// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 4x12 SOFT
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_4x12_SOFT] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(2.322e-02, -1.334e-02, -3.813e-01, 7.910e-03) * s0_0_0;
	r1 += V4(5.275e-01, 4.515e-03, -1.155e-02, -1.005e-02) * s0_0_0;
	r2 += V4(6.457e-03, -2.327e-01, 1.199e-02, 8.356e-03) * s0_0_0;
	r0 += V4(-2.629e-02, 9.310e-03, 2.420e-01, 2.029e-02) * s0_0_1;
	r1 += V4(-5.410e-01, -2.006e-02, -2.862e-02, 1.862e-02) * s0_0_1;
	r2 += V4(9.178e-01, 4.564e-01, 9.718e-01, 5.527e-01) * s0_0_1;
	r0 += V4(-5.596e-03, -2.557e-03, 9.555e-02, -3.055e-02) * s0_0_2;
	r1 += V4(3.034e-02, 4.269e-03, 6.556e-02, -3.897e-02) * s0_0_2;
	r2 += V4(-9.238e-01, -1.487e-02, 2.676e-02, -1.692e-02) * s0_0_2;
	r0 += V4(-2.434e-02, -8.784e-01, 1.569e-01, -2.853e-02) * s0_1_0;
	r1 += V4(-5.235e-01, -3.186e-02, 2.751e-02, -1.664e-02) * s0_1_0;
	r2 += V4(-1.066e-02, -8.471e-02, -1.319e-02, -2.474e-03) * s0_1_0;
	r0 += V4(5.137e-01, 8.809e-01, -2.154e-01, 6.148e-01) * s0_1_1;
	r1 += V4(2.204e-01, -9.160e-01, -1.003e+00, -1.794e-01) * s0_1_1;
	r2 += V4(4.694e-02, -2.050e-02, -9.473e-01, -5.840e-01) * s0_1_1;
	r0 += V4(-7.205e-03, 2.723e-03, 6.471e-02, -2.853e-01) * s0_1_2;
	r1 += V4(2.836e-01, -2.596e-02, 4.541e-01, -3.645e-02) * s0_1_2;
	r2 += V4(-3.672e-02, -5.897e-02, -4.198e-02, -5.657e-02) * s0_1_2;
	r0 += V4(-3.135e-02, -9.858e-03, 1.565e-01, 4.558e-03) * s0_2_0;
	r1 += V4(1.898e-02, 2.736e-02, 4.620e-03, 2.843e-02) * s0_2_0;
	r2 += V4(4.234e-03, -4.465e-03, 2.688e-03, 4.187e-02) * s0_2_0;
	r0 += V4(-4.582e-01, 9.960e-03, -1.802e-01, -1.242e-01) * s0_2_1;
	r1 += V4(3.098e-01, 9.331e-01, 1.831e-01, 5.111e-01) * s0_2_1;
	r2 += V4(-1.963e-03, 5.862e-02, -2.674e-02, -2.304e-02) * s0_2_1;
	r0 += V4(-6.111e-02, -5.471e-05, 1.940e-02, -7.496e-02) * s0_2_2;
	r1 += V4(-3.271e-01, 2.240e-02, 1.412e-01, -3.351e-01) * s0_2_2;
	r2 += V4(-3.684e-03, -4.067e-02, 1.415e-02, -1.691e-02) * s0_2_2;
	r0 += V4(-2.169e-03, -2.010e-07, -2.248e-02, 8.771e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-5.858e-03, -4.679e-06, -1.530e-02, -3.276e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.539e-04, -7.213e-03, -1.678e-04, -1.016e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_SOFT] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-3.534e-01, -9.137e-01, -2.860e-01, -8.732e-02, 5.888e-03, -1.577e-01, -6.763e-02, 9.140e-02, 3.884e-02, 3.933e-02, 6.648e-03, 1.487e-01, -3.342e-01, -4.014e-01, -2.235e-01, 2.684e-02) * s0_0_0;
	r1 += M4(-5.224e-01, -3.517e-01, -7.296e-01, -4.749e-02, 7.787e-02, -9.087e-04, -6.613e-03, -3.254e-02, 1.510e-03, -3.951e-02, -1.017e-01, -5.031e-02, -6.230e-01, -1.642e-01, 9.151e-03, 1.254e-01) * s0_0_0;
	r2 += M4(2.405e-01, -1.414e-01, 2.430e-01, 1.930e-01, -2.948e-01, 2.065e-01, -1.487e-02, 2.389e-02, -5.812e-02, 7.583e-02, 1.818e-02, 1.667e-01, 3.245e-01, 4.849e-01, 1.248e-01, -2.327e-02) * s0_0_0;
	r0 += M4(-4.899e-02, 1.121e-01, 2.742e-02, -1.000e+00, -1.223e-01, 6.866e-02, -2.231e-01, 1.474e-01, 5.508e-02, 9.657e-02, -9.937e-03, 6.904e-02, -3.643e-01, -9.599e-02, 1.711e-01, -2.329e-01) * s0_0_1;
	r1 += M4(-5.686e-01, 3.700e-01, -5.410e-01, -6.676e-02, -1.448e-01, 9.957e-03, 2.549e-01, 4.601e-02, 4.268e-02, 9.974e-02, -6.264e-02, -1.074e-02, -4.729e-03, -3.355e-01, -1.426e-01, 9.584e-02) * s0_0_1;
	r2 += M4(1.000e+00, -2.627e-01, -5.993e-01, 1.000e+00, 5.280e-01, 3.389e-01, -5.465e-01, -5.101e-02, -8.530e-02, 4.582e-02, -2.259e-01, -2.847e-01, -9.519e-03, -8.743e-02, 7.771e-02, 2.209e-02) * s0_0_1;
	r0 += M4(7.943e-02, 1.300e-01, 1.259e-01, -1.000e+00, 1.333e-01, 1.848e-01, -4.722e-01, -1.821e-01, 5.861e-02, 1.815e-02, 1.568e-01, 2.338e-01, -1.514e-01, -9.747e-02, 1.030e-01, 1.793e-01) * s0_0_2;
	r1 += M4(-1.776e-01, -1.816e-01, 7.504e-01, 9.125e-02, 3.573e-01, -3.556e-02, -1.267e-01, 1.287e-01, -8.411e-05, -1.262e-01, 1.902e-02, -2.912e-02, -1.573e-01, 3.877e-01, -2.082e-01, -1.488e-02) * s0_0_2;
	r2 += M4(-2.852e-01, 2.944e-01, 3.747e-01, 2.310e-01, 2.351e-01, 8.708e-02, -4.059e-01, 2.001e-01, -1.464e-02, 2.760e-01, 3.140e-01, 4.669e-01, 3.604e-01, -1.865e-01, -1.966e-01, -3.994e-01) * s0_0_2;
	r0 += M4(1.174e-01, 1.000e+00, -4.619e-01, -3.654e-01, 2.433e-02, -1.578e-01, 8.418e-02, -5.218e-04, 1.463e-02, 3.167e-02, 1.781e-01, -2.876e-02, -1.164e-01, 5.959e-01, -5.039e-01, -7.957e-01) * s0_1_0;
	r1 += M4(-6.107e-01, -2.126e-01, -3.087e-01, -5.285e-01, 4.671e-02, 7.300e-02, 1.052e-01, 5.913e-02, 6.816e-02, 9.511e-02, 5.408e-04, 8.298e-02, 1.041e-03, -2.475e-01, 4.503e-01, -6.921e-01) * s0_1_0;
	r2 += M4(-2.042e-01, 3.547e-01, 6.857e-01, -4.951e-01, 2.374e-01, -2.805e-01, -5.994e-02, -1.128e-01, 7.433e-02, -1.028e-01, 1.784e-01, 3.247e-03, -3.855e-01, -8.779e-02, 7.322e-01, 4.219e-01) * s0_1_0;
	r0 += M4(-1.008e-01, -3.356e-01, 2.566e-01, 5.248e-01, 9.990e-02, -1.000e+00, -6.081e-02, -4.740e-01, 1.281e-01, -2.351e-01, 1.437e-01, 4.272e-02, 1.000e+00, -3.801e-01, -7.531e-01, 7.698e-01) * s0_1_1;
	r1 += M4(-4.541e-01, -1.777e-01, 1.189e-01, 7.291e-02, -3.103e-02, 1.515e-02, -3.138e-01, 1.190e-01, -1.087e-01, 2.154e-01, -5.847e-01, -8.845e-04, 8.757e-01, -1.748e-01, 9.902e-01, 9.268e-01) * s0_1_1;
	r2 += M4(-7.101e-01, -2.732e-01, -6.469e-01, 2.153e-01, -1.000e+00, -1.379e-01, 2.398e-01, -1.000e+00, 1.845e-01, -1.092e-01, 1.441e-01, 4.209e-01, -3.623e-02, -5.604e-01, -5.988e-01, 1.031e-01) * s0_1_1;
	r0 += M4(-3.164e-01, -7.754e-01, 5.469e-01, 2.321e-01, 2.204e-01, 1.373e-01, -2.448e-01, -1.000e+00, 3.054e-02, -1.538e-01, -4.497e-01, -1.868e-01, -7.199e-02, 6.372e-02, 6.917e-01, -4.094e-01) * s0_1_2;
	r1 += M4(5.272e-01, 5.487e-01, -6.263e-01, 1.665e-01, -5.816e-01, 5.966e-01, 1.247e-01, 2.191e-01, -1.852e-01, -5.812e-02, -5.336e-02, -7.397e-02, 1.266e-01, 2.094e-01, -1.364e-01, 1.525e-01) * s0_1_2;
	r2 += M4(1.000e+00, 1.401e-01, -2.484e-01, 4.103e-01, -6.309e-01, 2.960e-01, -3.950e-02, 6.402e-01, -2.835e-01, -2.822e-01, 3.563e-01, -7.118e-01, 1.025e-01, 2.026e-01, 9.946e-02, -4.704e-01) * s0_1_2;
	r0 += M4(2.082e-02, 4.770e-02, 1.062e-01, -7.428e-02, 3.296e-03, -3.844e-02, 4.015e-02, -1.857e-02, -4.696e-02, 1.638e-01, -4.977e-02, 2.202e-01, 3.937e-02, -1.866e-01, 1.918e-01, 3.947e-01) * s0_2_0;
	r1 += M4(7.603e-02, 3.155e-02, -8.418e-03, 3.284e-02, 2.872e-02, -1.922e-02, 9.400e-02, 5.063e-02, 1.534e-02, -1.342e-01, 1.110e-02, 4.255e-02, -2.530e-01, 1.150e-01, -2.418e-01, -9.809e-02) * s0_2_0;
	r2 += M4(2.724e-01, 6.911e-02, 2.421e-02, -1.935e-01, -1.218e-01, 2.599e-02, -5.382e-02, 9.936e-02, -2.527e-01, -7.294e-02, 1.053e-01, -1.115e-01, 3.736e-01, 1.109e-01, 7.761e-02, 2.606e-01) * s0_2_0;
	r0 += M4(-9.720e-02, 6.085e-02, -3.630e-01, 7.545e-02, 4.136e-02, -5.788e-01, 1.871e-01, 1.791e-01, 6.192e-02, 4.000e-01, -2.886e-01, 2.113e-01, 1.065e-01, 2.975e-01, 2.365e-01, 8.166e-02) * s0_2_1;
	r1 += M4(3.361e-02, -1.388e-01, 3.174e-01, 1.228e-01, 1.608e-01, 1.641e-02, -2.505e-01, -1.565e-01, 2.700e-01, -3.271e-02, -5.926e-03, -6.337e-02, 1.713e-01, 2.916e-01, -5.668e-01, -4.006e-01) * s0_2_1;
	r2 += M4(-4.636e-01, -1.580e-02, 9.318e-02, 2.764e-01, -1.338e-01, -1.433e-01, -1.717e-01, -6.976e-02, 1.538e-01, 1.666e-01, 2.638e-01, 2.127e-01, -3.621e-01, 1.958e-01, -3.616e-01, 1.851e-01) * s0_2_1;
	r0 += M4(4.351e-03, -1.555e-01, 8.420e-02, -2.506e-01, 6.623e-02, -3.656e-01, 1.498e-01, -3.694e-02, -3.045e-02, 1.331e-01, -8.355e-02, -7.321e-02, 2.776e-02, -2.267e-02, -3.699e-02, 1.756e-01) * s0_2_2;
	r1 += M4(-1.575e-02, 4.272e-02, 6.710e-02, 5.611e-02, -1.206e-02, 1.596e-02, 8.022e-03, 5.567e-02, 2.513e-01, -1.563e-01, -2.087e-01, 5.258e-02, -1.048e-01, -2.239e-01, -1.138e-01, -2.617e-02) * s0_2_2;
	r2 += M4(2.771e-01, -1.256e-01, 3.833e-03, -3.125e-01, 2.625e-01, 5.582e-02, 8.362e-02, -8.139e-02, 1.601e-01, 7.286e-02, 9.911e-02, -9.362e-02, -3.393e-01, -1.850e-01, -6.719e-02, 1.289e-01) * s0_2_2;
	r0 += M4(-2.042e-03, 1.190e-02, 9.567e-02, 6.706e-02, 6.885e-02, -1.968e-01, -1.384e-01, -1.329e-02, 2.193e-01, 4.013e-01, 1.297e-01, 5.969e-02, -3.148e-02, -2.658e-01, 2.389e-01, -4.146e-02) * s1_0_0;
	r1 += M4(8.672e-02, -3.103e-02, 4.032e-02, 1.424e-03, 7.894e-02, -1.154e-01, -2.546e-04, 1.710e-02, 4.405e-01, 1.488e-01, -3.979e-03, -2.951e-02, 3.547e-03, 8.927e-02, 1.149e-01, 9.342e-02) * s1_0_0;
	r2 += M4(2.557e-02, 2.083e-02, -8.295e-03, 1.602e-01, -4.093e-01, -2.019e-01, -1.690e-01, -6.666e-01, -3.221e-01, -2.092e-01, -4.581e-01, -1.000e+00, 6.352e-01, -3.714e-01, -6.355e-01, 1.000e+00) * s1_0_0;
	r0 += M4(3.063e-02, 9.860e-02, 3.238e-02, 2.832e-02, 2.534e-01, 1.224e-01, -1.000e+00, 1.577e-02, 1.109e-01, -7.725e-02, 1.275e-01, 2.483e-01, -2.746e-03, -1.069e-01, -1.000e+00, 1.864e-01) * s1_0_1;
	r1 += M4(2.125e-01, -3.638e-02, 2.998e-01, -4.188e-03, -9.886e-02, 4.169e-01, -1.773e-01, -4.305e-02, 9.639e-02, 1.948e-01, 2.542e-01, -1.874e-02, 2.627e-01, -2.013e-01, 2.127e-01, 4.590e-02) * s1_0_1;
	r2 += M4(3.947e-01, -3.932e-02, -1.603e-01, 1.505e-01, 4.848e-01, -2.096e-01, -1.000e+00, -1.000e+00, 1.527e-01, -8.228e-02, 1.080e-01, 2.095e-02, 1.910e-01, 9.010e-02, -2.363e-01, -1.000e+00) * s1_0_1;
	r0 += M4(-1.773e-02, -5.186e-02, -2.763e-01, 1.531e-01, 1.180e-01, 1.829e-01, -3.799e-01, 8.313e-03, 6.763e-02, 9.308e-02, 1.679e-01, -1.104e-01, 4.342e-04, -7.343e-02, 7.888e-02, -5.109e-02) * s1_0_2;
	r1 += M4(1.142e-01, 2.493e-02, 1.466e-01, 3.834e-02, 2.974e-01, -1.534e-01, 1.136e-01, 4.537e-02, 3.325e-02, -1.206e-01, 2.617e-02, 1.144e-02, 2.251e-01, -3.100e-02, -4.619e-02, -4.590e-04) * s1_0_2;
	r2 += M4(2.045e-01, -6.113e-02, -2.573e-02, 1.289e-01, -5.277e-01, -1.723e-02, -7.766e-02, 2.577e-01, 4.450e-02, -2.575e-02, 1.559e-02, -7.738e-03, 2.586e-01, -8.405e-02, -1.492e-01, -2.091e-01) * s1_0_2;
	r0 += M4(1.058e-02, -6.917e-02, 1.690e-01, 3.733e-02, -2.297e-02, -5.735e-01, 2.410e-01, -2.339e-01, 1.557e-01, 7.915e-03, 3.025e-01, 4.224e-01, 9.440e-02, 7.049e-01, -3.037e-01, -3.162e-01) * s1_1_0;
	r1 += M4(6.171e-02, -3.116e-02, 4.896e-02, 1.000e-02, 2.478e-01, 4.748e-02, 2.386e-02, -1.208e-01, 4.050e-02, 1.890e-01, -2.037e-01, 4.365e-01, -4.699e-01, -4.132e-02, -5.881e-04, -2.957e-02) * s1_1_0;
	r2 += M4(-4.065e-02, 4.362e-02, -1.099e-01, 7.949e-02, 4.846e-01, -9.951e-02, -1.735e-01, -1.905e-01, 5.299e-02, -3.220e-01, -8.262e-01, 9.669e-02, -4.306e-01, -1.032e-01, 6.238e-01, -9.125e-02) * s1_1_0;
	r0 += M4(3.234e-02, 1.062e-01, 3.387e-01, 2.842e-01, 6.323e-02, 2.231e-01, -1.759e-01, -1.000e+00, -7.039e-01, 2.366e-01, 1.090e-01, 2.792e-01, 4.647e-02, -3.577e-02, 5.236e-02, -5.830e-01) * s1_1_1;
	r1 += M4(-1.723e-01, 6.952e-02, -1.611e-01, 1.965e-02, -7.989e-01, 2.273e-01, -2.594e-01, 2.501e-01, -1.000e+00, -5.433e-01, -4.482e-01, -6.820e-01, 4.440e-02, 2.781e-02, 3.423e-01, -8.884e-02) * s1_1_1;
	r2 += M4(-3.063e-01, 1.696e-01, 5.187e-01, 1.486e-01, 5.002e-02, 2.549e-01, 2.678e-01, 2.006e-01, 2.420e-01, -2.570e-03, 2.708e-01, -3.296e-01, 3.191e-01, -3.757e-02, -3.564e-02, 7.685e-02) * s1_1_1;
	r0 += M4(-9.179e-03, 3.307e-02, -1.548e-01, 8.038e-02, 6.289e-02, 3.584e-01, -1.449e-01, -8.459e-02, -6.595e-03, -9.966e-02, -5.430e-02, 2.112e-01, 2.046e-02, 1.125e-02, 1.201e-01, 1.008e-02) * s1_1_2;
	r1 += M4(-1.340e-01, 2.400e-01, 1.647e-01, 5.811e-02, 9.323e-02, -1.186e-01, -2.780e-01, -5.552e-02, 1.549e-02, 1.868e-01, -5.761e-02, -6.174e-02, 9.121e-02, -2.000e-04, 1.107e-01, -1.388e-02) * s1_1_2;
	r2 += M4(4.521e-01, 2.698e-02, 6.005e-02, 2.799e-01, -2.427e-01, -8.176e-02, 6.714e-02, -8.496e-02, -9.372e-02, -4.206e-02, -1.517e-01, 3.236e-01, -1.088e-01, 2.036e-01, -5.438e-02, 3.833e-01) * s1_1_2;
	r0 += M4(4.759e-03, -9.776e-02, 1.504e-02, 1.292e-01, 1.235e-02, -7.505e-02, 1.720e-02, 1.966e-02, -1.300e-02, 7.253e-02, 7.974e-03, -3.174e-01, -1.716e-02, 1.193e-01, 1.455e-01, 7.411e-02) * s1_2_0;
	r1 += M4(9.881e-02, -1.569e-02, 3.311e-02, -4.659e-02, -2.356e-02, 3.229e-02, -7.217e-02, 2.766e-03, 1.326e-01, -9.366e-03, 1.656e-01, 1.706e-01, -1.133e-01, 7.411e-02, 6.064e-02, 8.748e-02) * s1_2_0;
	r2 += M4(3.310e-01, -1.968e-01, 1.438e-01, -5.860e-02, -1.652e-01, -1.445e-01, 8.344e-02, 2.265e-02, -6.894e-02, 9.153e-02, -1.070e-01, -3.748e-01, 1.359e-01, 2.795e-01, -2.174e-01, 1.793e-01) * s1_2_0;
	r0 += M4(5.993e-03, -8.303e-02, 3.322e-01, 2.269e-01, 4.831e-02, -1.504e-02, 1.893e-03, 6.771e-02, -9.910e-02, 2.459e-02, -7.096e-02, -1.118e-01, -4.339e-02, 2.429e-01, -8.031e-02, 1.694e-01) * s1_2_1;
	r1 += M4(-1.368e-01, 2.486e-01, 2.126e-01, -1.195e-01, 1.274e-02, -1.071e-02, -6.178e-02, -1.081e-01, -1.488e-02, -5.219e-02, 2.593e-01, 2.285e-01, 4.132e-02, 2.546e-02, 2.235e-02, 8.327e-02) * s1_2_1;
	r2 += M4(9.433e-01, -5.394e-02, 1.057e-01, 2.641e-02, 3.369e-01, -7.947e-02, -1.428e-01, 7.389e-02, -1.402e-01, 1.313e-01, 1.640e-01, -9.017e-02, -5.088e-01, 2.549e-01, 1.471e-01, 1.759e-01) * s1_2_1;
	r0 += M4(-5.104e-03, -8.247e-02, -1.931e-02, 8.843e-02, 1.014e-02, 3.394e-02, 1.233e-01, 1.308e-01, -1.114e-02, 4.292e-02, -1.016e-01, -1.968e-02, -2.683e-02, 7.090e-02, -1.876e-01, -1.742e-01) * s1_2_2;
	r1 += M4(2.377e-03, 1.597e-02, 4.550e-02, 6.243e-02, -2.635e-02, 4.052e-02, -1.084e-01, -5.848e-02, -1.558e-02, 3.204e-02, 5.180e-02, 7.690e-02, 7.231e-02, -6.119e-02, -4.635e-02, -7.380e-03) * s1_2_2;
	r2 += M4(3.035e-02, 8.760e-02, -1.929e-01, 2.426e-03, -1.300e-01, 2.265e-02, 2.275e-02, 1.382e-01, 1.490e-01, 5.097e-02, 9.255e-02, 1.017e-01, 1.595e-01, -1.162e-01, 1.258e-01, -3.465e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(4.117e-02, -1.205e-02, -1.285e-01, 2.791e-02, -4.543e-02, -5.104e-02, 9.435e-02, -7.815e-02, 6.567e-02, 2.922e-02, -2.608e-01, 6.870e-03, -1.215e-01, -1.469e-02, 4.499e-01, -7.901e-02) * s0_0_0;
	r1 += M4(-2.801e-02, 7.450e-02, -6.352e-02, -9.036e-03, 2.550e-02, 1.798e-02, 3.253e-02, 2.531e-02, 8.024e-02, 7.426e-03, -4.462e-02, -9.735e-04, -1.856e-01, -2.721e-02, -7.181e-02, 1.904e-02) * s0_0_0;
	r2 += M4(-6.548e-02, -1.540e-02, -1.000e-02, -9.488e-02, 5.978e-02, -1.309e-02, -1.620e-02, 1.875e-01, -2.906e-01, 5.229e-02, -1.356e-01, 4.643e-01, 1.861e-01, -7.680e-02, 2.713e-01, -7.709e-01) * s0_0_0;
	r0 += M4(4.114e-02, 6.356e-02, 5.402e-02, -3.357e-02, -6.468e-02, -7.245e-02, -1.607e-01, -2.690e-02, 1.204e-01, 1.618e-02, 1.312e-01, 5.076e-02, -1.592e-01, -7.377e-02, -5.389e-02, 9.861e-03) * s0_0_1;
	r1 += M4(-1.489e-02, 4.611e-02, -2.913e-03, -1.009e-02, -2.976e-02, -8.448e-02, 2.713e-01, 8.460e-02, 1.346e-01, 5.069e-02, 1.169e-01, -2.259e-02, -2.355e-01, -1.089e-01, -2.528e-01, -2.669e-03) * s0_0_1;
	r2 += M4(-6.115e-02, 1.150e-01, -1.744e-02, -3.205e-01, -1.005e-01, 4.403e-02, 4.564e-02, 3.414e-01, 4.414e-01, -9.512e-02, -6.532e-02, -4.578e-01, -4.077e-01, -8.361e-02, 1.040e-01, 9.268e-01) * s0_0_1;
	r0 += M4(3.230e-03, -4.779e-03, 6.367e-03, -1.075e-02, -2.609e-02, -1.293e-01, 9.081e-04, -8.300e-02, 4.473e-02, 1.057e-01, -1.472e-01, -2.212e-02, 1.324e-02, -5.324e-02, 2.359e-01, 4.956e-02) * s0_0_2;
	r1 += M4(3.479e-02, -6.678e-03, -1.540e-02, 5.464e-03, -3.705e-02, 1.242e-01, -1.492e-02, -2.420e-02, 1.676e-03, 4.341e-03, 7.074e-02, -2.828e-02, 3.502e-02, 3.913e-03, -8.461e-02, 5.126e-02) * s0_0_2;
	r2 += M4(3.928e-02, 2.133e-02, 4.819e-03, 1.254e-01, 2.426e-01, -1.400e-01, 4.962e-03, -4.666e-01, -3.995e-01, -1.635e-01, -1.830e-01, 2.431e-01, 5.016e-01, 2.629e-01, 1.467e-01, -2.308e-01) * s0_0_2;
	r0 += M4(2.393e-01, -5.122e-02, -3.501e-01, -2.480e-01, 3.686e-02, 5.751e-01, 1.862e-02, -1.489e-01, 2.715e-02, 2.426e-01, 4.423e-01, -8.178e-02, 2.397e-01, 8.922e-01, 1.153e-01, 2.725e-02) * s0_1_0;
	r1 += M4(3.560e-01, 4.482e-02, 2.733e-02, -5.086e-02, -3.007e-01, -1.117e-01, -8.550e-02, -4.093e-02, 1.935e-01, 1.764e-01, 1.827e-01, 9.349e-02, 3.524e-01, 1.052e-01, 5.420e-01, -1.058e-01) * s0_1_0;
	r2 += M4(-1.425e-01, -6.156e-01, -2.823e-01, -3.451e-01, 3.952e-01, -5.503e-01, 2.497e-01, -1.596e-01, 3.975e-03, -6.179e-02, -4.438e-01, 1.094e-01, -2.423e-01, 3.879e-01, 1.364e-01, 7.363e-01) * s0_1_0;
	r0 += M4(1.519e-01, -1.585e-02, -5.740e-01, 2.850e-02, 6.633e-03, 3.828e-02, 4.688e-01, -1.266e-01, 2.623e-01, -7.138e-02, 4.198e-02, -1.000e+00, -1.278e-01, -5.041e-02, -4.412e-01, 1.000e+00) * s0_1_1;
	r1 += M4(-2.394e-01, 1.018e-01, 1.670e-01, -9.791e-02, -1.887e-01, 8.151e-03, 1.770e-02, -1.129e-01, -2.266e-01, 6.430e-02, -2.382e-01, -3.697e-02, -1.000e+00, -3.676e-01, 2.867e-01, 5.825e-02) * s0_1_1;
	r2 += M4(1.920e-01, -3.502e-01, -8.756e-04, -1.000e+00, -4.250e-01, 1.806e-01, 1.899e-01, -5.310e-01, -5.645e-01, 3.422e-01, 5.674e-01, -1.000e+00, -4.395e-01, 9.441e-02, 6.948e-02, -1.000e+00) * s0_1_1;
	r0 += M4(6.479e-02, 5.590e-02, -1.404e-01, 1.607e-02, 7.286e-02, 1.395e-01, 7.206e-02, -5.736e-02, 4.757e-02, 8.907e-02, -5.527e-01, -2.072e-01, -1.352e-01, -2.820e-01, 3.703e-01, 1.000e+00) * s0_1_2;
	r1 += M4(-2.227e-02, -1.301e-01, 8.469e-02, -2.320e-02, 2.679e-02, 8.283e-03, 3.547e-02, 3.664e-02, -1.369e-01, -1.227e-01, 1.907e-01, -1.191e-02, 6.898e-02, 1.784e-01, -8.099e-01, -4.646e-02) * s0_1_2;
	r2 += M4(-1.436e-01, 1.850e-02, 6.614e-02, 7.246e-02, -3.679e-01, 3.483e-01, 1.209e-01, 5.382e-01, 2.035e-01, -2.415e-02, 1.755e-01, -1.838e-01, 2.609e-01, -2.539e-01, -1.738e-01, 1.284e-01) * s0_1_2;
	r0 += M4(2.642e-02, 5.454e-02, 3.764e-01, -1.000e+00, -5.615e-02, 4.648e-01, -2.135e-01, 5.942e-02, 3.686e-04, -2.551e-01, 5.907e-02, 9.651e-02, -1.012e-02, -5.820e-01, 5.914e-01, 2.723e-01) * s0_2_0;
	r1 += M4(-5.665e-01, 2.021e-01, -4.094e-01, 3.330e-01, -7.857e-02, -5.703e-02, -5.133e-01, -1.243e-01, -1.359e-01, -1.063e-02, 2.488e-02, 2.602e-01, 5.221e-01, 2.697e-01, 2.496e-01, 3.774e-01) * s0_2_0;
	r2 += M4(-1.628e-01, 5.751e-01, 3.185e-02, -2.771e-01, 2.080e-02, 6.580e-01, 9.202e-02, -8.391e-02, -8.735e-01, -2.705e-01, -4.297e-02, -2.309e-01, 1.000e+00, -1.127e-02, -3.780e-01, 1.000e+00) * s0_2_0;
	r0 += M4(1.157e-01, 1.582e-02, -1.000e+00, -2.213e-01, -3.081e-02, -2.842e-01, -6.672e-01, 4.057e-01, -1.101e-01, -1.000e+00, 4.639e-01, 6.485e-02, 1.189e-01, -8.161e-01, -5.147e-01, -3.872e-01) * s0_2_1;
	r1 += M4(-3.360e-01, 7.520e-02, 3.467e-01, -1.359e-01, 2.129e-01, 1.052e-02, 1.178e-01, -7.143e-03, -1.958e-01, 1.420e-01, -3.535e-01, 3.577e-01, 6.621e-01, -6.169e-02, -5.910e-02, -1.918e-01) * s0_2_1;
	r2 += M4(1.011e-01, 2.830e-01, 4.051e-01, -1.000e+00, -1.941e-03, -1.625e-01, -2.533e-01, -3.571e-02, -1.000e+00, 8.883e-02, 7.963e-02, -1.000e+00, 5.383e-01, -3.216e-01, 1.699e-01, -5.780e-01) * s0_2_1;
	r0 += M4(3.233e-02, 3.211e-02, -2.787e-01, 1.397e-01, -6.976e-02, -4.121e-01, 6.612e-01, -2.227e-01, -4.879e-02, -1.152e-01, -3.456e-01, 8.147e-02, 2.417e-01, 4.823e-01, -1.814e-01, -1.230e-01) * s0_2_2;
	r1 += M4(-1.460e-01, -3.657e-02, 1.111e-01, -3.317e-02, 3.366e-01, 3.424e-01, 9.509e-02, 7.844e-02, -1.655e-01, -4.939e-02, 1.263e-01, 3.805e-02, -4.562e-01, -4.805e-01, 4.223e-01, -1.631e-01) * s0_2_2;
	r2 += M4(-6.195e-02, 3.160e-02, 4.248e-02, 1.825e-01, 1.240e-01, -1.976e-01, -2.388e-01, -1.979e-01, 1.765e-01, 8.364e-02, 8.073e-02, -4.421e-02, -7.488e-01, -2.500e-01, 2.726e-01, -4.861e-01) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_SOFT] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(3.167e-02, -3.010e-02, 4.620e-02, -1.740e-01, -5.176e-02, 4.510e-02, -2.072e-02, -2.836e-02, 2.024e-02, 4.099e-02, 1.778e-02, 6.186e-02, 1.424e-02, 2.708e-02, 8.500e-02, -8.991e-02) * s0_0_0;
	r1 += M4(-1.273e-01, 9.003e-02, 2.881e-01, 4.616e-02, -1.709e-01, 1.448e-01, -4.914e-02, -2.762e-02, -1.859e-01, 3.250e-01, -1.401e-01, -7.132e-02, 1.324e-02, 6.849e-02, 1.566e-02, 2.128e-01) * s0_0_0;
	r2 += M4(3.381e-03, 9.564e-02, -2.019e-02, -2.109e-01, 2.085e-02, -8.293e-02, 1.353e-01, -7.704e-01, 5.436e-02, 1.107e-01, 5.148e-02, -6.974e-01, 1.288e-01, 3.902e-02, 9.368e-02, 3.834e-01) * s0_0_0;
	r0 += M4(3.839e-03, 4.237e-02, 6.157e-02, -2.028e-03, 1.547e-03, 7.615e-02, 2.333e-02, 5.232e-02, 2.292e-05, -5.169e-03, 4.805e-02, 1.987e-01, 6.869e-02, -5.676e-02, -4.610e-02, -5.068e-01) * s0_0_1;
	r1 += M4(-3.171e-02, -3.025e-01, -1.546e-02, 2.194e-01, 8.527e-04, 2.864e-01, -1.161e-01, 1.927e-02, -4.457e-01, 1.107e-02, -1.220e-01, -1.719e-01, 2.104e-01, 1.981e-01, 7.025e-02, 1.278e-01) * s0_0_1;
	r2 += M4(-6.267e-02, -6.386e-02, 4.051e-01, 1.578e-01, 3.790e-01, 2.276e-01, -4.359e-01, 3.284e-01, -4.545e-01, 6.802e-02, 9.036e-02, -1.000e+00, 3.346e-01, 2.021e-01, 9.963e-02, 3.546e-02) * s0_0_1;
	r0 += M4(-2.683e-02, 2.193e-02, -8.852e-02, -1.672e-02, 8.619e-02, 2.509e-02, 5.409e-02, 1.455e-01, 2.019e-02, -2.431e-02, -2.801e-02, -9.647e-02, 9.768e-02, 6.563e-03, 1.400e-02, 4.328e-02) * s0_0_2;
	r1 += M4(5.379e-03, -3.510e-01, 6.164e-02, -1.151e-01, 1.978e-02, -2.405e-01, 9.584e-03, -8.039e-02, -1.409e-01, -6.255e-02, 1.245e-01, -1.994e-01, 2.317e-02, 1.596e-01, 7.168e-02, -2.276e-02) * s0_0_2;
	r2 += M4(-6.968e-02, 4.916e-02, -4.435e-02, -4.491e-01, 2.179e-02, -2.271e-02, -1.136e-01, 2.741e-01, -4.801e-02, 3.297e-03, -4.824e-02, 6.398e-02, 1.720e-01, 2.856e-02, 1.034e-01, 1.292e-01) * s0_0_2;
	r0 += M4(-1.324e-02, -6.892e-02, 7.056e-01, -4.526e-01, 8.674e-02, 1.032e-04, 1.252e-01, 3.765e-02, -1.894e-02, -1.511e-02, -3.385e-01, 1.520e-03, -1.441e-02, -5.161e-03, 1.497e-01, -4.850e-01) * s0_1_0;
	r1 += M4(2.044e-01, -1.749e-01, 5.316e-01, 8.974e-02, -1.659e-01, -1.129e-01, 5.697e-02, -8.328e-02, -2.095e-01, -4.170e-01, 3.176e-02, 6.658e-03, 2.381e-01, 3.278e-01, 1.066e-02, 6.832e-02) * s0_1_0;
	r2 += M4(-3.346e-01, -4.796e-01, -2.062e-01, -2.351e-01, -1.515e-01, -6.455e-02, -6.892e-02, -6.749e-01, 1.421e-01, 9.245e-02, 1.775e-02, 2.915e-01, 1.877e-01, -4.582e-02, 2.814e-02, 1.053e-01) * s0_1_0;
	r0 += M4(-6.308e-01, -2.407e-01, -3.665e-01, 5.349e-01, -9.677e-02, 8.897e-02, 3.694e-01, -2.776e-01, 5.841e-02, -6.608e-02, -1.062e-01, 5.206e-02, -4.121e-01, -3.149e-01, -3.591e-01, -2.505e-01) * s0_1_1;
	r1 += M4(-1.559e-01, -5.845e-01, -8.796e-01, -4.958e-01, -2.908e-01, 3.178e-01, 1.116e-01, -1.674e-01, -2.812e-01, -4.488e-02, -6.470e-03, 4.859e-02, -4.505e-01, -1.000e+00, -7.350e-01, -1.000e+00) * s0_1_1;
	r2 += M4(-3.877e-01, -6.126e-02, -5.840e-01, 2.907e-01, -2.272e-01, -1.466e-01, -1.045e-01, 3.375e-01, 1.582e-01, -8.544e-02, -8.133e-02, 6.270e-01, 3.068e-01, 3.113e-01, -2.094e-01, -5.155e-01) * s0_1_1;
	r0 += M4(2.973e-01, -2.533e-02, -1.375e-01, -6.515e-02, -3.021e-01, -6.316e-03, 4.420e-02, -2.350e-01, 1.452e-01, 8.152e-02, -1.624e-02, -1.048e-01, 4.099e-02, 4.742e-02, -1.530e-02, 1.441e-01) * s0_1_2;
	r1 += M4(-1.754e-01, -3.919e-01, 9.371e-02, -3.848e-02, 1.159e-01, 7.013e-02, -3.491e-02, -1.805e-01, -4.083e-02, -1.056e-01, 2.923e-02, -1.687e-01, -7.171e-02, 2.735e-01, 9.076e-02, 6.078e-02) * s0_1_2;
	r2 += M4(-7.377e-02, 1.065e-01, -1.538e-01, -3.842e-01, 2.560e-01, -1.512e-01, 1.476e-02, 2.433e-01, 1.048e-01, 8.156e-02, 3.451e-02, 1.058e-01, 1.079e-01, -2.733e-02, 1.580e-02, -1.054e-01) * s0_1_2;
	r0 += M4(2.392e-02, 6.401e-02, -1.977e-02, -4.206e-02, -3.313e-02, 4.178e-02, -9.878e-02, -5.598e-02, 6.393e-03, 2.581e-02, -1.157e-01, 2.438e-02, -1.547e-03, 1.255e-02, 1.248e-01, 2.407e-03) * s0_2_0;
	r1 += M4(-1.244e-01, -2.804e-03, 6.523e-02, -4.014e-02, 2.197e-03, 3.734e-02, 6.154e-02, -1.030e-01, -5.451e-02, 5.056e-02, 4.134e-02, -6.190e-02, 8.729e-02, 4.207e-02, 3.038e-03, 1.071e-01) * s0_2_0;
	r2 += M4(3.574e-02, -2.377e-01, 1.933e-02, -1.210e-01, 9.079e-02, 1.273e-02, 2.936e-02, 3.068e-01, 5.249e-02, -1.136e-01, -1.342e-02, 1.971e-01, 7.540e-02, -4.954e-02, -1.205e-02, 7.866e-02) * s0_2_0;
	r0 += M4(-7.043e-02, 4.785e-01, -9.949e-02, -1.894e-01, 1.024e-01, -1.999e-03, 6.874e-02, -3.471e-01, 8.797e-04, -9.735e-02, -1.303e-01, -6.982e-02, 1.931e-02, 8.008e-02, 4.428e-02, 9.956e-02) * s0_2_1;
	r1 += M4(7.156e-02, 1.303e-01, 1.397e-01, 5.630e-02, -3.802e-02, 7.998e-03, -6.287e-03, -9.478e-02, -1.832e-02, 1.661e-02, -1.737e-02, -4.362e-01, 2.497e-02, 6.862e-02, 1.424e-02, 1.461e-01) * s0_2_1;
	r2 += M4(4.660e-02, -2.623e-01, 3.592e-02, -7.349e-02, 1.013e-02, -3.282e-02, 1.980e-02, -1.758e-01, 3.803e-02, -1.252e-01, 4.723e-02, -6.289e-02, 4.701e-02, 2.080e-02, -1.319e-02, -3.436e-01) * s0_2_1;
	r0 += M4(1.476e-01, 4.327e-02, -6.801e-02, -1.241e-01, 1.228e-01, 8.826e-02, 1.892e-02, -1.947e-01, 3.322e-02, 7.367e-02, 7.080e-03, -3.296e-01, 6.668e-02, 8.711e-02, 2.191e-02, 7.797e-02) * s0_2_2;
	r1 += M4(-9.774e-02, -1.125e-01, 7.773e-02, -3.031e-01, -1.503e-01, 5.060e-02, -8.573e-02, -4.929e-01, -1.119e-01, 1.066e-01, -7.035e-02, -5.584e-01, 7.653e-02, 5.999e-02, 4.750e-02, 9.319e-02) * s0_2_2;
	r2 += M4(1.282e-02, 6.050e-02, 1.441e-02, -3.544e-02, -1.986e-02, -4.759e-02, 2.364e-02, 3.287e-01, -6.679e-03, 4.563e-03, 4.551e-02, 1.858e-01, 4.228e-02, 2.634e-02, 2.600e-02, -4.906e-01) * s0_2_2;
	r0 += M4(-2.277e-02, -1.081e-02, -2.352e-02, 1.222e-01, 3.432e-03, 3.104e-02, 9.342e-03, 1.086e-01, -4.719e-03, -1.959e-02, 5.383e-02, 8.896e-02, -2.223e-02, 2.236e-02, -1.363e-01, 6.927e-02) * s1_0_0;
	r1 += M4(8.902e-02, -1.515e-01, -8.153e-02, -8.083e-02, 6.323e-02, -1.281e-02, -2.460e-01, 4.026e-02, -7.602e-02, 1.561e-01, 2.681e-02, -2.499e-02, -2.900e-01, -2.186e-01, -1.512e-02, -1.343e-01) * s1_0_0;
	r2 += M4(-5.459e-02, -6.414e-02, 5.803e-03, -6.117e-02, -2.906e-02, 1.734e-02, 4.145e-02, 2.520e-02, 2.008e-02, 1.589e-01, 1.120e-02, 2.360e-01, -5.559e-02, 6.951e-02, 7.499e-02, 1.912e-01) * s1_0_0;
	r0 += M4(-1.725e-02, 3.483e-02, -9.599e-02, -1.070e-01, 4.704e-03, 2.494e-02, -1.233e-01, 6.965e-02, 4.220e-03, -1.652e-02, 1.505e-02, -5.900e-02, 2.513e-02, -5.048e-02, -2.194e-01, -2.244e-01) * s1_0_1;
	r1 += M4(-7.635e-02, 1.751e-01, -2.368e-02, -9.279e-02, 1.636e-01, -3.361e-01, -1.160e-01, -1.733e-02, 1.385e-01, 3.708e-01, 9.685e-03, 1.825e-02, 2.607e-01, 3.529e-02, 3.166e-01, 1.590e-01) * s1_0_1;
	r2 += M4(5.587e-03, -1.577e-01, -2.227e-01, -5.748e-02, 1.716e-01, 1.675e-01, -1.237e-01, 1.010e-01, 1.261e-01, -1.365e-02, 1.217e-01, 2.106e-02, -4.176e-01, 1.676e-01, 6.037e-01, 9.916e-02) * s1_0_1;
	r0 += M4(6.248e-02, -5.976e-02, 6.392e-02, 5.381e-02, 5.023e-03, -2.689e-02, 3.349e-02, 7.537e-02, 8.367e-04, -1.763e-02, 4.187e-02, 8.255e-04, 7.929e-02, 2.021e-02, 6.159e-02, 5.005e-02) * s1_0_2;
	r1 += M4(-1.325e-02, 3.688e-01, -5.328e-02, 1.048e-01, 1.909e-01, -1.186e-02, -6.767e-02, 5.286e-03, -2.919e-02, 3.048e-01, -5.115e-02, 4.326e-02, -1.491e-01, 8.067e-02, 2.941e-02, 1.674e-01) * s1_0_2;
	r2 += M4(1.592e-02, 6.845e-02, -3.208e-02, 2.349e-02, 6.318e-02, 5.986e-02, -5.600e-03, 5.992e-01, 8.719e-02, -4.747e-02, 7.985e-02, 3.548e-01, -1.559e-01, -3.005e-01, 1.392e-01, -8.055e-02) * s1_0_2;
	r0 += M4(2.424e-02, 4.325e-02, -2.250e-01, 2.077e-02, 6.496e-02, 6.361e-02, -2.313e-01, 3.119e-01, 2.159e-02, 1.316e-02, 1.853e-01, -1.588e-01, -3.041e-02, -1.022e-01, 1.063e-01, -1.001e-01) * s1_1_0;
	r1 += M4(2.337e-02, 1.236e-01, -2.338e-02, 7.315e-02, 1.537e-01, 3.473e-01, -2.036e-01, 3.293e-01, 2.646e-01, 1.878e-01, -3.984e-02, 2.555e-01, -1.175e-01, 5.181e-02, -1.383e-02, -4.601e-02) * s1_1_0;
	r2 += M4(3.519e-01, 2.395e-02, -8.580e-02, 1.248e-02, -1.668e-01, 7.578e-02, 8.538e-02, 2.456e-01, -4.480e-02, 3.460e-02, -2.148e-02, 1.061e-01, -6.671e-02, 2.414e-02, 3.012e-03, -1.007e-01) * s1_1_0;
	r0 += M4(1.179e-01, 1.654e-01, -1.352e-01, -2.328e-01, 2.750e-01, 1.157e-02, 1.352e-01, 4.549e-01, 3.538e-02, -1.654e-01, 1.767e-01, -1.910e-01, -9.207e-02, -8.734e-01, -2.591e-01, -1.420e-01) * s1_1_1;
	r1 += M4(-3.039e-01, -5.253e-01, 1.691e-01, -8.893e-02, -6.452e-02, -5.860e-02, -4.171e-01, 3.038e-01, 8.089e-02, -7.351e-02, 3.639e-01, -2.731e-01, 1.333e-01, -2.125e-01, -4.458e-01, -3.120e-01) * s1_1_1;
	r2 += M4(2.708e-01, 5.610e-01, -4.237e-01, -1.164e-01, -4.545e-01, -1.180e-01, -9.997e-02, 5.719e-02, 3.053e-01, 1.276e-01, 1.141e-03, -1.812e-01, 2.042e-01, -4.725e-01, -1.526e-02, -2.436e-01) * s1_1_1;
	r0 += M4(2.625e-01, 3.235e-02, -5.304e-03, 1.088e-01, -1.665e-01, -1.636e-02, -2.624e-02, -3.884e-02, -2.007e-01, -1.499e-02, 1.096e-01, 3.359e-02, 3.033e-01, -1.403e-01, -2.904e-02, -3.258e-02) * s1_1_2;
	r1 += M4(1.449e-01, 3.371e-01, -7.966e-02, 4.148e-01, 1.551e-01, -1.048e-01, -1.159e-01, -3.452e-02, 1.052e-01, 1.870e-01, -5.102e-03, 8.368e-02, -1.417e-01, -1.545e-01, 1.767e-01, -2.170e-01) * s1_1_2;
	r2 += M4(1.382e-01, 1.450e-01, 6.303e-02, -1.768e-01, 1.766e-01, -1.717e-01, -4.646e-03, 3.214e-01, 1.939e-02, -1.030e-01, 6.324e-02, 2.705e-01, -1.527e-01, 1.822e-01, -7.370e-02, -1.986e-01) * s1_1_2;
	r0 += M4(1.252e-02, 1.217e-01, 3.986e-02, 9.519e-02, -3.865e-02, -1.311e-01, 2.961e-02, -5.611e-02, -2.129e-02, 1.473e-02, 3.877e-04, -1.239e-03, 2.118e-02, 1.974e-02, 2.526e-02, 3.990e-02) * s1_2_0;
	r1 += M4(8.690e-02, 4.694e-02, -3.599e-03, 6.496e-02, 1.352e-01, -1.211e-01, 5.748e-02, -1.744e-01, -8.332e-02, -4.411e-02, 9.918e-02, -1.199e-01, -4.353e-02, 6.284e-02, -9.271e-03, -2.653e-03) * s1_2_0;
	r2 += M4(1.571e-02, 2.005e-01, 3.361e-02, -1.430e-01, 7.552e-02, 1.109e-02, -1.703e-02, 2.083e-01, 7.846e-03, 1.773e-01, 1.516e-02, -8.421e-03, -9.737e-02, 8.948e-02, -1.012e-02, -4.947e-02) * s1_2_0;
	r0 += M4(6.851e-02, 3.445e-01, 5.350e-02, 1.104e-01, -3.396e-02, -6.301e-02, 1.002e-01, -5.045e-02, -1.140e-01, -1.567e-03, 5.679e-02, 3.263e-02, -2.317e-03, 7.882e-02, -9.755e-03, 1.676e-02) * s1_2_1;
	r1 += M4(8.622e-02, 3.272e-01, 6.923e-02, 3.896e-01, 9.434e-02, 9.556e-03, -1.212e-02, -9.750e-02, -1.532e-01, 1.466e-01, 1.298e-01, 3.761e-03, -2.896e-02, -5.690e-03, 3.972e-02, -5.504e-02) * s1_2_1;
	r2 += M4(-2.524e-02, 2.174e-01, 1.050e-01, -3.774e-01, -7.521e-03, -4.342e-02, -2.840e-02, -1.486e-01, -1.521e-01, 1.597e-01, -2.901e-02, -8.437e-02, 7.856e-02, -1.940e-01, 7.775e-03, -7.562e-02) * s1_2_1;
	r0 += M4(9.519e-02, 1.499e-01, 4.641e-02, 2.047e-01, -8.288e-02, 7.987e-03, -3.507e-02, 9.466e-02, -6.544e-02, 3.611e-03, 4.430e-02, 9.655e-02, 6.144e-02, 3.578e-02, -1.457e-02, -5.681e-02) * s1_2_2;
	r1 += M4(8.164e-02, 2.307e-01, -8.586e-02, 7.206e-02, 9.401e-02, -9.941e-02, -4.716e-02, 1.746e-01, 1.064e-01, 9.039e-02, -8.671e-02, 1.833e-01, -2.044e-02, 1.642e-02, 1.007e-01, -8.117e-02) * s1_2_2;
	r2 += M4(6.632e-02, -3.165e-02, 2.095e-02, 1.360e-01, 5.991e-02, -1.057e-01, -2.342e-02, -5.365e-02, -1.989e-02, 1.080e-02, -1.882e-02, -5.245e-02, -7.987e-03, 1.879e-03, -2.661e-04, -7.147e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-1.826e-02, 2.340e-02, -2.582e-02, 1.759e-01, 2.257e-02, 6.809e-04, -5.343e-02, 3.468e-02, 1.621e-03, -7.020e-03, 1.282e-02, -3.768e-02, -8.343e-03, -4.972e-03, 8.671e-03, 4.183e-02) * s0_0_0;
	r1 += M4(-1.021e-01, 1.960e-01, 2.101e-02, -1.442e-01, -1.745e-01, 5.268e-02, -5.919e-02, -1.029e-01, 9.804e-02, -2.333e-03, 7.358e-02, 4.641e-02, 7.889e-02, -5.341e-02, 1.826e-02, -2.387e-02) * s0_0_0;
	r2 += M4(-1.396e-01, 8.044e-02, 1.255e-01, -5.185e-01, -7.862e-02, -1.759e-02, -1.863e-02, -4.296e-01, 1.042e-01, 6.674e-03, -1.199e-02, -3.230e-02, -2.422e-02, 2.318e-02, 5.888e-02, -3.283e-01) * s0_0_0;
	r0 += M4(-2.343e-02, -2.157e-02, 6.753e-02, 5.134e-02, 1.156e-01, 5.830e-02, -2.485e-02, -1.195e-01, 4.868e-03, -7.118e-02, 7.851e-02, 1.153e-01, 3.894e-02, 2.539e-02, 5.279e-03, 3.506e-02) * s0_0_1;
	r1 += M4(4.204e-02, 3.397e-01, 1.475e-01, -1.025e-01, -2.158e-01, 1.462e-01, 3.693e-02, 5.502e-02, 1.481e-01, 5.778e-02, -4.916e-02, 5.641e-02, 4.713e-02, -1.255e-02, 3.624e-02, -2.011e-02) * s0_0_1;
	r2 += M4(1.020e-02, 1.245e-01, 1.286e-02, 1.673e-01, 2.387e-01, 2.184e-01, 9.298e-02, -1.126e-01, -1.891e-01, -5.095e-02, -3.102e-02, -3.896e-01, 1.061e-02, 4.445e-02, -1.065e-01, -2.938e-01) * s0_0_1;
	r0 += M4(3.355e-02, 4.565e-02, 2.224e-02, 2.484e-02, 7.986e-02, 4.652e-02, -9.588e-03, -1.218e-01, -1.281e-01, -1.272e-02, -1.220e-02, 1.872e-01, 2.500e-02, 1.942e-02, 4.489e-04, 4.921e-02) * s0_0_2;
	r1 += M4(-1.841e-01, 1.359e-01, 4.896e-02, -2.590e-02, -1.500e-01, 7.364e-02, -3.033e-02, -5.821e-03, 1.675e-01, -1.601e-01, -1.036e-01, 1.579e-01, -1.307e-03, -2.972e-02, -1.149e-02, 5.481e-02) * s0_0_2;
	r2 += M4(-1.200e-01, 1.028e-01, -3.204e-02, -4.179e-01, -4.709e-02, 3.871e-02, 4.360e-02, -5.725e-01, 1.047e-01, 8.730e-02, 8.596e-04, -5.840e-02, -9.046e-02, 6.880e-02, 6.607e-03, 2.945e-02) * s0_0_2;
	r0 += M4(3.344e-02, 2.132e-02, 1.907e-02, 2.701e-03, -2.058e-02, 3.827e-03, -1.887e-02, -2.826e-01, -4.147e-02, 7.845e-03, 6.694e-02, 2.321e-01, 3.030e-02, 4.367e-03, -1.188e-01, 6.290e-02) * s0_1_0;
	r1 += M4(-2.015e-01, 1.463e-01, 5.197e-02, -1.775e-01, -2.493e-01, -2.494e-02, 4.684e-02, -1.911e-01, 1.548e-01, 4.976e-03, -4.497e-02, 1.921e-01, 4.284e-02, -2.650e-02, 1.225e-01, 1.274e-02) * s0_1_0;
	r2 += M4(-4.358e-02, 2.749e-01, -2.109e-02, -5.186e-01, 2.541e-02, 1.003e-01, -3.937e-02, -6.840e-01, -1.111e-01, -9.628e-04, 1.197e-01, -1.101e-01, -4.286e-02, -5.026e-02, 6.897e-02, -1.000e+00) * s0_1_0;
	r0 += M4(4.231e-04, 7.332e-02, 3.655e-02, -1.683e-01, 6.619e-02, 1.050e-01, 1.072e-01, -5.275e-01, -2.982e-02, -4.534e-02, 1.173e-03, 6.890e-02, 9.502e-02, 1.130e-01, -8.081e-02, -1.718e-01) * s0_1_1;
	r1 += M4(-6.791e-02, -1.780e-01, 1.939e-01, -2.374e-01, -1.516e-01, 2.473e-01, 2.114e-02, -1.600e-01, 5.038e-01, 1.451e-01, -2.402e-01, 2.433e-01, 3.980e-02, 8.054e-02, 3.406e-01, 1.847e-01) * s0_1_1;
	r2 += M4(1.686e-01, 2.159e-01, -1.875e-02, 4.706e-02, -2.602e-01, 2.580e-01, 2.426e-01, -2.421e-02, 2.915e-01, 3.936e-01, 7.828e-02, 1.047e-01, -1.691e-01, 2.885e-01, -3.991e-02, -1.736e-01) * s0_1_1;
	r0 += M4(8.368e-02, 5.240e-02, 4.318e-02, 7.681e-02, 5.653e-02, -2.089e-02, -1.067e-02, 4.738e-04, -6.842e-02, 5.084e-02, 2.968e-02, 1.044e-01, 1.817e-02, 2.777e-02, 3.673e-02, -5.401e-02) * s0_1_2;
	r1 += M4(-1.111e-01, 3.953e-02, 1.796e-01, -1.155e-01, -2.848e-01, 5.119e-02, 4.759e-02, -1.104e-01, 3.280e-01, -1.943e-01, -2.418e-01, 4.674e-02, 3.152e-02, 6.568e-02, 1.044e-01, 6.399e-02) * s0_1_2;
	r2 += M4(-8.556e-02, 3.766e-03, -3.694e-02, -1.803e-01, 6.661e-02, 1.455e-01, -6.448e-04, -3.034e-01, 2.685e-01, -1.268e-01, 1.113e-01, 5.461e-01, -4.076e-02, -5.352e-02, -5.977e-03, -1.487e-02) * s0_1_2;
	r0 += M4(2.363e-02, 5.011e-02, 9.956e-03, -3.267e-02, 8.968e-02, 5.160e-02, 7.783e-02, -1.030e-01, -3.047e-02, -1.500e-01, 3.143e-02, 2.357e-01, 1.689e-02, -8.849e-03, -1.071e-01, -2.523e-02) * s0_2_0;
	r1 += M4(-1.294e-01, 3.981e-02, 2.223e-02, -4.019e-02, -1.081e-01, 4.101e-01, 3.896e-02, 1.376e-01, 8.056e-02, -1.382e-01, -4.601e-02, 8.736e-02, -1.117e-01, 2.452e-02, 8.726e-02, -1.308e-02) * s0_2_0;
	r2 += M4(-6.287e-02, 8.529e-02, 2.204e-02, -4.919e-02, -6.383e-02, -6.544e-04, 1.950e-02, -1.230e-02, -1.098e-02, 7.290e-02, -2.149e-03, -3.573e-01, -1.451e-02, 6.320e-02, 3.426e-02, -1.377e-01) * s0_2_0;
	r0 += M4(7.200e-02, 1.881e-01, 3.033e-02, 5.348e-02, 5.168e-02, -4.382e-02, 3.790e-02, -5.033e-02, -1.136e-01, -3.301e-01, 8.629e-02, 1.615e-02, 6.427e-02, 5.829e-03, 2.494e-02, -2.265e-02) * s0_2_1;
	r1 += M4(-9.671e-02, 9.236e-02, 4.070e-02, -2.800e-02, 6.402e-04, 2.802e-02, -1.259e-01, -3.914e-01, 2.051e-01, 4.155e-02, -8.844e-02, 2.140e-01, 4.362e-02, 1.021e-01, 2.209e-01, 4.450e-02) * s0_2_1;
	r2 += M4(-5.959e-02, 6.276e-02, 2.806e-02, -5.730e-03, 1.016e-01, 6.908e-02, -3.901e-02, 6.500e-01, 1.372e-01, -1.158e-01, 7.059e-02, 4.162e-02, 6.567e-02, 1.230e-01, 4.302e-02, -1.721e-01) * s0_2_1;
	r0 += M4(1.638e-02, -4.559e-03, 7.553e-03, 2.527e-03, 2.256e-02, -3.345e-03, 3.607e-02, -1.039e-01, -4.982e-02, -3.311e-02, -1.232e-02, 1.084e-02, 3.964e-02, 2.031e-02, 9.050e-03, 8.474e-02) * s0_2_2;
	r1 += M4(2.022e-02, 1.890e-01, 1.216e-01, -2.168e-01, -1.208e-01, 8.620e-02, -1.262e-01, -2.121e-01, 1.942e-01, -4.112e-01, -1.024e-01, 3.174e-01, 7.359e-02, 6.749e-02, 7.597e-02, -1.024e-02) * s0_2_2;
	r2 += M4(2.497e-02, 1.124e-01, -2.005e-03, -2.638e-01, 4.401e-02, 5.899e-02, 2.453e-02, 2.604e-01, 6.928e-02, -2.884e-01, 1.728e-02, 2.061e-01, 4.146e-03, 7.929e-04, 2.855e-02, -9.576e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_SOFT] -conv3
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv2
//!BIND LUMA
//!SAVE conv3
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(1.068e-03, 1.116e-01, 4.866e-02, 6.661e-02, -4.108e-02, 1.304e-02, 7.411e-02, 5.245e-05, -1.321e-02, 4.580e-02, 1.546e-02, -1.718e-02, 3.903e-03, -1.080e-02, -5.076e-02, -3.844e-02) * s0_0_0;
	r1 += M4(4.839e-02, 4.499e-02, 7.155e-02, 8.717e-02, 1.057e-01, 8.489e-02, 7.195e-02, 2.820e-02, -1.569e-03, 1.439e-02, 3.746e-02, 6.754e-03, 2.115e-02, -8.890e-03, -3.597e-02, 1.379e-02) * s0_0_0;
	r2 += M4(-1.042e-01, 2.221e-01, 7.816e-02, -1.151e-01, -3.037e-02, -5.905e-02, 3.384e-02, -2.185e-02, -1.535e-03, 3.738e-02, -2.698e-02, -1.927e-02, 2.365e-02, 1.462e-02, -4.327e-02, 6.176e-02) * s0_0_0;
	r0 += M4(3.314e-02, 5.071e-02, -9.291e-02, 4.362e-02, 2.431e-01, 1.538e-01, -3.721e-01, 1.465e-02, 2.010e-01, 8.551e-02, -8.788e-02, 1.011e-02, 2.707e-02, -4.592e-03, -2.117e-02, -7.530e-02) * s0_0_1;
	r1 += M4(2.193e-02, 9.641e-02, -2.914e-01, 3.878e-03, 1.175e-01, 5.219e-01, -1.944e-01, 8.239e-02, -9.744e-02, -3.373e-04, 1.771e-02, 2.086e-02, -8.618e-02, -1.587e-02, 7.036e-02, 8.380e-02) * s0_0_1;
	r2 += M4(-2.220e-01, -1.754e-02, 1.460e-01, -7.381e-03, 2.788e-02, -1.363e-01, 3.113e-03, 8.518e-02, 4.740e-02, -1.077e-01, -6.327e-02, 4.878e-02, 3.829e-02, 2.457e-02, 9.223e-03, -3.270e-02) * s0_0_1;
	r0 += M4(-4.661e-02, 1.448e-02, 2.929e-03, 1.968e-02, -8.051e-02, 6.233e-02, -5.128e-03, 1.250e-02, 8.560e-04, -1.679e-02, 4.771e-02, -5.188e-02, -6.773e-02, -8.725e-02, -1.301e-02, -8.561e-02) * s0_0_2;
	r1 += M4(-4.956e-02, 1.662e-02, -1.500e-01, 1.386e-02, 9.813e-02, 8.688e-02, -3.456e-01, 2.582e-02, -1.933e-01, 1.798e-02, -1.816e-01, 5.302e-02, -9.770e-02, 8.956e-02, -5.799e-02, 2.479e-02) * s0_0_2;
	r2 += M4(-3.584e-02, -2.902e-02, -3.247e-02, 3.167e-02, -1.326e-02, -1.127e-02, -1.017e-01, 3.009e-02, -2.528e-02, -4.737e-02, -6.454e-02, 8.633e-04, 8.455e-02, 2.558e-03, -8.010e-02, 8.884e-02) * s0_0_2;
	r0 += M4(-1.367e-02, 1.420e-01, 3.259e-02, -6.670e-02, -9.501e-02, 1.131e-02, 7.447e-02, -7.285e-02, 4.070e-02, 5.946e-02, 9.754e-03, -5.934e-02, 3.089e-02, -5.865e-02, -7.238e-02, -2.590e-02) * s0_1_0;
	r1 += M4(-1.267e-01, 1.907e-02, 1.741e-02, -6.499e-02, 3.820e-02, 4.994e-02, 5.865e-02, 7.190e-02, 6.067e-02, 6.185e-03, 2.980e-02, -3.753e-02, 2.697e-02, 2.848e-02, 5.175e-02, 8.277e-03) * s0_1_0;
	r2 += M4(1.630e-01, 5.326e-01, 2.118e-02, 7.922e-02, 1.213e-01, -6.247e-02, 1.945e-02, 1.264e-01, 8.936e-02, 5.994e-02, -4.522e-02, -3.582e-02, 3.576e-02, -5.725e-02, -1.599e-01, 1.920e-02) * s0_1_0;
	r0 += M4(-1.307e-02, -4.776e-02, 2.529e-01, -1.818e-01, -2.412e-02, -1.472e-01, -2.234e-01, 4.409e-02, 1.189e-01, -2.325e-02, -7.607e-02, -2.059e-01, 1.668e-02, 3.147e-02, 6.883e-02, 1.893e-01) * s0_1_1;
	r1 += M4(-3.460e-01, -2.682e-01, 2.608e-01, -4.643e-01, 3.804e-01, -3.916e-01, -1.427e-01, -1.712e-01, 2.795e-01, -1.616e-01, 4.259e-02, -9.201e-02, 3.808e-01, 1.860e-01, 2.223e-01, -5.844e-02) * s0_1_1;
	r2 += M4(1.440e-01, -5.870e-02, -1.979e-01, 2.114e-01, -1.992e-01, -4.895e-02, -1.799e-01, -1.732e-01, -5.351e-02, 4.941e-02, -3.447e-01, 8.185e-02, -1.997e-02, -1.101e-01, -1.274e-01, 5.177e-01) * s0_1_1;
	r0 += M4(1.290e-02, -3.842e-02, 4.169e-02, -3.384e-03, -6.144e-02, 2.211e-01, 2.143e-02, -4.914e-02, 5.655e-01, 3.514e-01, 4.232e-01, -1.249e-02, 1.135e-01, 5.727e-03, -1.686e-02, -4.366e-02) * s0_1_2;
	r1 += M4(-1.160e-02, -4.114e-03, -1.970e-01, -2.936e-02, 2.161e-01, -1.280e-02, 1.773e-02, 1.799e-01, 1.193e-01, -2.060e-01, -6.011e-01, 1.739e-01, 1.844e-01, -8.021e-02, -2.634e-01, 2.417e-01) * s0_1_2;
	r2 += M4(4.348e-02, 1.207e-02, 3.307e-03, -7.921e-03, -2.014e-02, -6.575e-02, -7.285e-02, -6.219e-03, -2.284e-02, 2.896e-02, 1.023e-01, -1.418e-01, -7.967e-02, -2.454e-02, -6.293e-02, 3.158e-02) * s0_1_2;
	r0 += M4(-5.832e-03, 1.453e-02, -1.208e-02, -1.465e-01, -1.762e-02, -9.762e-03, 1.442e-02, 4.662e-02, 3.877e-02, -3.630e-02, -2.964e-02, 2.154e-02, 6.738e-03, -4.927e-02, -3.253e-02, -3.390e-02) * s0_2_0;
	r1 += M4(4.087e-02, 2.195e-02, 6.035e-02, -1.256e-01, 5.802e-02, -2.671e-02, -2.816e-03, 1.802e-02, 8.707e-02, -1.256e-02, 2.392e-02, -3.304e-02, -7.165e-02, -4.580e-03, -1.450e-02, 5.391e-03) * s0_2_0;
	r2 += M4(-2.413e-01, 5.610e-02, -8.801e-02, 1.299e-01, -4.636e-02, -2.874e-02, -1.353e-02, -1.303e-02, -9.599e-02, -4.757e-02, -4.058e-02, -2.959e-02, 6.469e-03, 3.574e-02, -4.556e-02, 2.728e-02) * s0_2_0;
	r0 += M4(-1.172e-01, 1.362e-01, -1.043e-01, -9.531e-02, 2.972e-02, 5.428e-03, -1.178e-02, -1.662e-02, 8.268e-02, 1.480e-01, -1.925e-02, 4.177e-02, 1.910e-02, 8.968e-02, -3.002e-02, 5.330e-02) * s0_2_1;
	r1 += M4(1.912e-02, -1.164e-02, 1.106e-02, -4.422e-01, 2.972e-02, 1.357e-02, 9.441e-02, 1.432e-02, -1.325e-01, -2.616e-02, 4.320e-02, -3.870e-02, -1.331e-01, 2.200e-02, 1.048e-01, -7.022e-02) * s0_2_1;
	r2 += M4(-2.231e-01, 2.812e-02, -7.075e-02, -2.520e-02, -5.358e-03, 1.315e-02, -1.195e-02, -5.372e-02, -3.178e-03, 2.412e-02, 2.157e-02, 3.386e-02, 1.789e-01, -4.740e-02, -3.714e-02, 1.101e-01) * s0_2_1;
	r0 += M4(4.061e-02, -5.316e-03, 1.760e-02, -2.956e-03, -3.061e-02, 6.757e-02, -8.854e-04, -3.843e-03, 7.183e-02, -2.722e-02, 1.362e-01, 8.704e-02, 3.945e-02, -2.176e-01, 2.134e-04, 2.879e-02) * s0_2_2;
	r1 += M4(1.515e-02, -6.983e-02, -7.283e-03, -1.343e-01, 4.746e-02, -2.440e-02, -3.903e-02, 2.310e-02, -7.427e-02, -3.675e-02, 1.086e-01, -3.519e-02, -4.367e-02, 6.322e-02, 5.081e-02, 9.953e-02) * s0_2_2;
	r2 += M4(-4.359e-02, 1.256e-02, -1.711e-02, -2.475e-02, 3.713e-02, -2.268e-02, -8.517e-03, -2.620e-02, 5.395e-02, -1.973e-02, -2.694e-02, 2.395e-02, 1.437e-01, 2.397e-02, 9.882e-02, 1.095e-01) * s0_2_2;
	r0 += M4(-2.299e-03, -7.805e-03, 6.237e-02, 4.387e-02, -9.525e-03, -7.713e-02, -7.533e-02, -1.928e-02, -1.237e-02, -3.051e-02, -1.271e-02, 2.708e-02, -3.123e-02, -1.265e-01, -5.580e-02, -1.021e-02) * s1_0_0;
	r1 += M4(-1.135e-02, 2.930e-02, -3.602e-02, 1.034e-02, -3.247e-02, 2.550e-02, -9.816e-02, -1.210e-02, 1.036e-02, -2.026e-02, 6.117e-03, 3.611e-03, -1.019e-01, -5.913e-02, -1.000e-02, -3.283e-02) * s1_0_0;
	r2 += M4(6.781e-04, 1.004e-02, 3.787e-02, -1.343e-03, -1.869e-02, -5.374e-02, 5.461e-02, -3.601e-02, -1.511e-02, 4.790e-03, 2.370e-02, 7.098e-02, -2.090e-02, -4.143e-02, -3.757e-02, 4.113e-03) * s1_0_0;
	r0 += M4(-4.644e-03, 7.737e-03, -7.593e-02, 7.436e-02, -6.920e-02, -7.006e-02, -7.161e-02, 3.169e-02, -7.776e-02, -2.009e-02, -2.568e-02, 1.897e-02, -1.185e-01, -9.778e-02, 1.225e-01, -2.422e-02) * s1_0_1;
	r1 += M4(1.475e-01, 1.218e-01, -1.649e-01, -4.408e-02, 2.059e-02, -3.165e-02, -7.868e-02, 5.041e-02, 6.639e-02, -2.241e-02, 6.310e-02, 3.047e-02, -1.310e-01, -2.757e-02, 3.826e-01, 1.084e-02) * s1_0_1;
	r2 += M4(-1.486e-01, -4.952e-02, 3.747e-02, 1.128e-01, -8.440e-02, -1.280e-01, 3.360e-02, -6.330e-02, -8.171e-02, 2.895e-02, 5.674e-02, 1.396e-02, 6.491e-02, 3.053e-02, -1.120e-01, -1.039e-01) * s1_0_1;
	r0 += M4(2.255e-02, 2.949e-03, 3.340e-02, 3.456e-02, 2.781e-02, 1.987e-02, -5.678e-02, -5.115e-03, 6.586e-02, 4.446e-02, -1.627e-02, 1.537e-02, 5.620e-02, -1.222e-01, -5.240e-02, -4.162e-02) * s1_0_2;
	r1 += M4(3.071e-02, -1.701e-02, -9.353e-02, 6.649e-02, -7.143e-02, 1.816e-02, 1.156e-01, -2.945e-02, -4.277e-03, -1.655e-03, 2.177e-01, 3.013e-02, -7.008e-02, -2.934e-02, -2.330e-01, -1.534e-02) * s1_0_2;
	r2 += M4(-9.598e-02, -2.094e-02, -3.798e-02, 1.280e-02, 4.117e-02, 3.577e-02, 9.622e-02, -1.205e-01, -2.588e-02, 4.394e-02, 9.790e-02, -5.997e-02, -6.097e-02, -5.511e-03, 8.652e-02, 3.433e-02) * s1_0_2;
	r0 += M4(-1.137e-01, 6.614e-02, 3.334e-02, -1.734e-02, 3.533e-02, -9.781e-02, -5.929e-02, 8.390e-03, -3.851e-02, 6.131e-03, -1.501e-02, -4.226e-02, -7.948e-03, -8.841e-02, -3.702e-02, 7.119e-02) * s1_1_0;
	r1 += M4(-1.880e-01, 6.981e-02, 2.831e-02, -3.805e-02, -1.862e-01, -1.373e-01, 5.164e-02, 5.093e-02, -1.764e-01, 2.229e-02, -1.369e-02, 1.811e-02, 1.458e-01, -1.040e-02, -3.891e-02, 9.709e-02) * s1_1_0;
	r2 += M4(6.470e-02, 1.515e-02, 1.248e-01, 1.660e-01, -1.230e-01, -5.419e-02, 5.633e-02, -1.487e-01, -9.217e-02, -5.075e-02, 1.007e-01, 7.413e-02, -1.801e-01, -3.430e-02, -1.238e-01, -1.860e-01) * s1_1_0;
	r0 += M4(2.106e-01, -1.473e-01, 7.241e-02, -3.681e-02, -2.788e-01, 3.919e-02, -4.342e-02, 1.240e-02, -3.655e-01, -3.253e-01, -1.180e-01, -1.007e-01, -2.273e-01, -1.240e-01, -1.977e-01, -1.912e-02) * s1_1_1;
	r1 += M4(-3.157e-01, -7.394e-02, -2.841e-01, -2.219e-01, -3.726e-01, 6.771e-02, 1.100e-01, -2.700e-01, -6.033e-01, -1.909e-01, -2.580e-01, -7.404e-02, 1.636e-01, 1.680e-01, 1.037e-01, 2.870e-01) * s1_1_1;
	r2 += M4(1.627e-01, 6.960e-02, 6.898e-02, -3.094e-01, -1.453e-01, -2.870e-02, 9.897e-02, -6.305e-02, -3.948e-01, -8.310e-02, 7.715e-01, -4.149e-01, -1.689e-01, -2.645e-02, 6.678e-02, 9.855e-02) * s1_1_1;
	r0 += M4(-2.401e-01, -1.258e-01, 1.095e-01, 1.211e-01, -5.401e-02, -5.367e-02, -1.484e-01, -8.477e-02, 1.479e-01, -1.072e-01, 1.782e-02, 8.053e-03, 1.085e-01, -2.627e-01, 1.160e-01, -3.594e-02) * s1_1_2;
	r1 += M4(-1.037e-01, 9.624e-02, 2.540e-01, 2.995e-03, 2.123e-02, -1.742e-01, -3.251e-01, -3.857e-02, -7.315e-02, -1.932e-02, -1.431e-01, -2.176e-02, -9.074e-02, -4.172e-02, 1.475e-02, -1.043e-01) * s1_1_2;
	r2 += M4(5.933e-02, -3.982e-02, -1.417e-01, 1.430e-01, -6.338e-02, -3.325e-02, 2.095e-01, -3.018e-02, -1.002e-01, 5.122e-03, 1.799e-02, -2.471e-01, -7.795e-02, 4.738e-02, 7.730e-02, -1.636e-01) * s1_1_2;
	r0 += M4(-2.354e-02, 4.261e-02, 5.555e-02, -2.703e-02, -7.063e-02, -3.238e-02, -3.920e-02, -6.436e-02, 2.644e-02, 8.064e-02, 1.679e-02, 1.543e-02, -1.962e-02, -9.703e-02, 3.837e-02, -4.627e-02) * s1_2_0;
	r1 += M4(-1.483e-02, -1.184e-02, 5.466e-02, 4.655e-02, -1.111e-01, -5.410e-02, -1.225e-01, 1.148e-01, -6.372e-02, 2.689e-02, -1.830e-02, 9.236e-02, -2.941e-01, 4.033e-02, -2.484e-02, -1.073e-01) * s1_2_0;
	r2 += M4(-8.040e-02, 8.554e-03, -3.070e-02, 7.273e-02, -4.013e-02, 5.972e-03, 9.358e-02, -5.239e-02, 1.218e-01, -7.936e-03, -1.117e-02, -3.009e-02, 1.067e-01, 5.335e-02, 1.025e-01, 3.839e-02) * s1_2_0;
	r0 += M4(-1.560e-03, -1.724e-01, -1.331e-02, 1.849e-01, -8.001e-02, -1.093e-02, -8.303e-02, -6.138e-02, -3.981e-02, -1.179e-01, 9.425e-04, 6.821e-02, 7.249e-03, -1.014e-01, 4.505e-02, 7.239e-02) * s1_2_1;
	r1 += M4(7.209e-02, -8.964e-02, -1.037e-02, -2.427e-01, 4.557e-02, 1.793e-03, -1.086e-01, -8.023e-02, -1.870e-01, 1.185e-02, -1.775e-02, 4.664e-02, -1.792e-01, 2.268e-02, 1.052e-02, -2.712e-01) * s1_2_1;
	r2 += M4(-1.500e-01, -2.668e-02, -1.515e-01, -9.941e-02, -1.446e-01, -1.161e-01, 4.502e-02, -1.173e-01, 1.306e-01, 2.782e-02, 1.306e-03, -1.405e-01, 6.945e-02, -4.015e-02, 9.301e-02, -9.453e-02) * s1_2_1;
	r0 += M4(-3.773e-02, 3.113e-02, -2.636e-02, -1.968e-01, 3.092e-02, -2.359e-01, -3.762e-02, -6.756e-02, -1.890e-03, -2.175e-01, 1.032e-02, -2.284e-02, -6.987e-02, -2.059e-01, 7.343e-03, 5.384e-02) * s1_2_2;
	r1 += M4(1.765e-02, -4.505e-02, -5.097e-02, -2.447e-02, -6.150e-02, 2.564e-02, -3.756e-02, 4.068e-02, -1.799e-01, 9.182e-02, -2.579e-02, 1.954e-02, -1.803e-02, 7.359e-02, 3.257e-02, 7.197e-02) * s1_2_2;
	r2 += M4(-1.393e-01, -2.853e-02, 6.723e-03, 7.591e-03, -5.093e-02, 1.851e-02, 1.225e-01, -4.597e-02, 3.487e-02, 1.978e-02, 2.083e-02, 3.006e-02, -4.444e-02, 2.497e-02, -3.925e-02, 1.259e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(4.363e-03, 1.070e-02, 8.275e-03, -3.989e-02, 5.746e-03, -9.167e-03, -6.561e-03, -5.444e-03, -5.529e-03, 2.381e-02, -1.825e-02, -1.794e-02, 2.106e-02, 9.064e-02, 6.717e-02, 6.303e-02) * s0_0_0;
	r1 += M4(-8.863e-03, -2.851e-02, -1.044e-01, -2.468e-02, -3.799e-03, -2.334e-03, -1.471e-02, 2.630e-02, 3.382e-02, 3.278e-02, 8.104e-03, 1.006e-02, 1.337e-01, 8.820e-02, 5.405e-02, 6.367e-02) * s0_0_0;
	r2 += M4(4.908e-03, -1.031e-02, 6.134e-02, 2.790e-02, -1.657e-02, -4.025e-02, 5.224e-02, 3.934e-03, -9.621e-03, -6.721e-04, 9.393e-03, -4.032e-02, 3.325e-02, 7.986e-02, 3.711e-02, 7.829e-02) * s0_0_0;
	r0 += M4(1.109e-01, 1.717e-02, 2.213e-02, -5.149e-02, -3.920e-02, -7.936e-03, -7.160e-02, 1.417e-02, 3.535e-02, 3.321e-02, -4.058e-02, 4.530e-03, 7.090e-02, -1.944e-02, 5.986e-02, 7.972e-02) * s0_0_1;
	r1 += M4(4.103e-02, -3.250e-02, 2.377e-02, 4.820e-02, -1.291e-01, -8.497e-02, -1.342e-01, -3.797e-02, -2.938e-02, -2.061e-02, -1.086e-01, 3.193e-04, 1.057e-01, 1.558e-01, 7.366e-02, 3.782e-02) * s0_0_1;
	r2 += M4(5.898e-02, -1.164e-02, 8.460e-02, -1.804e-03, 3.249e-02, -6.326e-02, 4.287e-02, 1.362e-02, 2.235e-02, -3.595e-02, -4.163e-02, -5.563e-03, -4.503e-02, -1.358e-02, 2.536e-02, 4.387e-02) * s0_0_1;
	r0 += M4(2.549e-02, -1.262e-03, -3.543e-02, -6.437e-02, -4.160e-02, 1.381e-02, 1.788e-02, -9.960e-03, -3.703e-02, -8.290e-03, -2.784e-03, -2.257e-02, -5.424e-02, 3.179e-02, 3.184e-02, -9.191e-03) * s0_0_2;
	r1 += M4(2.136e-02, -4.363e-02, -4.894e-02, 4.381e-02, 6.756e-02, -9.050e-02, 1.170e-01, -1.259e-02, -4.356e-02, 7.749e-03, 3.573e-02, -2.497e-02, 9.388e-02, 8.871e-02, -1.562e-02, 6.649e-02) * s0_0_2;
	r2 += M4(4.361e-02, 2.057e-02, 2.321e-02, 1.138e-03, 5.272e-03, 7.501e-03, 3.656e-02, -1.694e-02, 6.720e-02, -2.248e-02, -4.619e-03, 2.367e-02, -5.125e-02, -3.192e-02, -7.998e-04, 7.299e-02) * s0_0_2;
	r0 += M4(6.043e-02, 4.324e-02, -4.110e-02, -7.235e-03, 4.241e-02, 8.523e-03, 5.346e-02, -1.047e-01, -3.745e-03, 3.187e-02, -5.787e-02, -4.156e-02, -4.838e-02, 8.395e-03, 9.941e-02, 3.579e-03) * s0_1_0;
	r1 += M4(2.376e-02, 2.489e-03, -7.318e-02, -1.878e-02, 6.112e-02, -1.640e-02, 8.282e-02, -7.564e-04, 9.056e-02, -2.495e-02, -3.157e-02, 1.176e-02, 4.767e-02, 5.591e-02, 8.993e-02, 8.450e-02) * s0_1_0;
	r2 += M4(7.252e-02, 7.997e-02, -2.455e-03, -6.826e-02, 6.453e-02, -2.486e-02, 4.017e-02, 1.883e-02, 1.150e-01, 1.676e-01, -8.189e-02, -2.030e-01, 1.377e-01, 3.076e-02, 5.607e-02, 1.242e-01) * s0_1_0;
	r0 += M4(-1.322e-01, 4.285e-02, 2.514e-01, 2.017e-01, -2.512e-01, -2.598e-01, -5.840e-02, -3.308e-01, -4.088e-02, -6.470e-02, 1.294e-01, -1.436e-01, 9.125e-02, 5.198e-02, 8.109e-02, 1.241e-02) * s0_1_1;
	r1 += M4(-2.823e-01, -1.894e-02, 6.251e-01, 1.165e-01, -2.865e-01, -3.896e-02, 6.517e-02, 8.589e-02, -7.691e-02, -1.231e-01, 1.138e-01, -1.200e-01, 3.774e-02, -2.271e-03, 1.700e-01, -1.492e-02) * s0_1_1;
	r2 += M4(-1.560e-01, -5.656e-02, -8.361e-03, -1.256e-01, -3.055e-01, -2.563e-03, -6.304e-02, -3.382e-01, 1.170e-01, -2.399e-01, -1.692e-01, 1.250e-01, 9.685e-02, -3.707e-02, 7.598e-03, -4.502e-02) * s0_1_1;
	r0 += M4(-1.841e-02, 1.067e-01, -4.421e-02, 7.892e-02, -1.083e-01, 1.839e-02, -1.107e-01, -1.047e-01, 1.636e-02, -6.280e-03, -1.357e-02, -3.408e-02, -2.681e-01, -6.168e-02, -4.589e-03, -3.553e-02) * s0_1_2;
	r1 += M4(-1.253e-01, -1.086e-01, 8.171e-02, 1.564e-01, -1.086e-01, -3.703e-02, 1.586e-01, 3.312e-02, 2.862e-02, -1.008e-01, -4.048e-01, 6.811e-02, -9.809e-02, -2.291e-02, 1.224e-01, 7.336e-02) * s0_1_2;
	r2 += M4(1.778e-02, 5.547e-02, 1.821e-01, -1.411e-01, -1.407e-01, 2.180e-02, -5.875e-02, 1.048e-02, 7.627e-02, 2.844e-03, -6.563e-03, -8.640e-02, 1.552e-01, -1.688e-02, -4.535e-02, 3.419e-02) * s0_1_2;
	r0 += M4(2.668e-02, -8.701e-02, 1.526e-02, 4.481e-02, 5.118e-03, 1.070e-02, 4.921e-02, -2.916e-02, 1.582e-02, -2.058e-02, -5.750e-02, 1.877e-01, -4.992e-03, 6.471e-02, 6.176e-02, -1.483e-03) * s0_2_0;
	r1 += M4(1.650e-03, -1.312e-02, 4.189e-02, 4.885e-03, 1.968e-03, 3.255e-03, -2.437e-02, 1.361e-02, 6.864e-02, -9.367e-02, 2.181e-02, 2.440e-02, 1.479e-03, -1.053e-02, 7.094e-02, -4.757e-02) * s0_2_0;
	r2 += M4(4.488e-02, -8.368e-02, 3.589e-02, -4.242e-04, -5.510e-02, -5.736e-02, 1.104e-01, 3.326e-02, -9.306e-02, -1.155e-01, -8.978e-02, -1.997e-01, -6.045e-02, 3.146e-02, -9.706e-03, 9.869e-02) * s0_2_0;
	r0 += M4(-3.102e-02, 9.394e-02, -2.439e-02, -1.549e-01, 6.827e-03, -3.416e-02, -8.218e-02, -5.480e-02, 1.743e-02, 2.637e-01, 8.097e-02, 4.213e-01, 3.833e-02, -7.055e-02, 2.034e-02, 3.554e-02) * s0_2_1;
	r1 += M4(1.115e-02, 4.920e-02, 4.934e-02, 3.186e-01, 1.015e-01, 1.705e-02, -1.545e-02, -3.467e-02, -3.842e-02, 1.022e-01, 2.165e-01, -8.424e-01, 4.610e-02, -8.710e-03, 5.541e-02, 1.682e-02) * s0_2_1;
	r2 += M4(-2.459e-01, -4.852e-02, 1.155e-01, -9.786e-02, 5.490e-02, -5.629e-02, 5.986e-02, 1.002e-01, 6.189e-01, 4.104e-03, 1.383e-01, 1.702e-01, -2.210e-01, 4.219e-03, -2.318e-02, -4.034e-02) * s0_2_1;
	r0 += M4(-6.400e-02, -3.331e-02, -5.174e-02, 1.057e-02, -4.779e-02, 3.650e-02, -4.637e-02, -1.072e-01, -6.451e-02, -9.162e-03, -8.632e-02, 2.205e-01, -2.632e-01, -4.010e-02, 4.408e-02, -2.477e-02) * s0_2_2;
	r1 += M4(-1.116e-01, 4.935e-02, -4.831e-03, 1.340e-01, -7.870e-02, 5.291e-02, -4.361e-02, 1.627e-01, 4.915e-02, -9.853e-02, 1.366e-02, -4.452e-02, -9.293e-04, -3.672e-02, 2.844e-02, -2.637e-02) * s0_2_2;
	r2 += M4(-1.152e-01, 2.557e-02, 3.495e-02, 2.377e-02, 9.690e-02, -1.570e-02, 7.498e-02, 3.536e-02, 1.418e-01, -1.765e-02, -3.519e-02, -8.556e-02, -3.162e-01, -1.893e-02, -1.104e-01, 5.009e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_SOFT] -conv4
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv3
//!BIND LUMA
//!SAVE conv4
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(1.512e-02, 3.133e-02, -5.927e-03, -5.296e-03, -4.818e-02, 3.437e-03, -2.862e-02, 7.841e-03, -3.286e-02, 3.315e-02, 3.440e-02, 1.021e-02, -5.321e-02, -1.819e-02, -4.830e-03, -3.886e-03) * s0_0_0;
	r1 += M4(-1.152e-02, -3.365e-02, 2.920e-02, 8.943e-04, 2.977e-02, 2.820e-02, -6.852e-02, -6.363e-02, 2.246e-02, -1.871e-03, -3.688e-02, -3.439e-02, -5.872e-03, 1.965e-02, -1.072e-02, 4.379e-02) * s0_0_0;
	r2 += M4(3.671e-02, -2.258e-02, 1.616e-01, 2.084e-01, -1.115e-01, 3.096e-02, -1.919e-01, -2.244e-01, 9.818e-03, -3.826e-02, 2.581e-02, 5.808e-02, 2.290e-02, 1.130e-02, 1.748e-01, 3.101e-01) * s0_0_0;
	r0 += M4(-3.958e-02, 5.259e-02, 6.149e-03, -1.149e-03, 2.447e-01, 2.876e-02, -2.209e-02, 1.260e-02, -3.654e-02, 1.617e-01, 1.657e-03, 3.479e-04, -2.660e-03, -2.198e-01, 2.173e-03, -6.710e-03) * s0_0_1;
	r1 += M4(6.319e-02, 2.807e-02, 3.011e-02, -5.400e-01, 2.998e-02, -4.682e-02, -4.679e-02, 1.556e-01, -1.253e-02, -2.144e-02, 1.522e-02, 2.408e-01, 5.717e-01, -2.900e-01, 6.090e-03, 1.009e-01) * s0_0_1;
	r2 += M4(-2.159e-02, -1.857e-02, -9.153e-02, -9.284e-03, -4.898e-03, -3.451e-03, 6.438e-02, 9.352e-02, 4.484e-02, 2.176e-02, 3.248e-02, 7.446e-02, -1.838e-02, 4.687e-02, -6.168e-01, -1.238e-01) * s0_0_1;
	r0 += M4(-8.009e-03, -9.850e-02, -6.543e-03, 2.297e-03, -1.008e-01, 1.096e-01, 2.181e-03, -8.994e-03, 1.344e-02, 9.149e-02, 7.448e-03, 2.912e-03, -2.589e-02, -2.210e-02, -1.477e-02, 7.758e-04) * s0_0_2;
	r1 += M4(4.080e-02, 2.676e-02, 3.316e-02, 2.692e-01, 2.296e-02, 1.233e-02, 1.382e-01, -1.707e-01, -7.560e-03, -2.574e-03, 2.977e-02, -1.528e-01, 3.948e-02, 3.217e-02, -1.062e-01, -4.314e-01) * s0_0_2;
	r2 += M4(1.417e-02, -4.602e-03, 3.849e-03, 7.553e-03, -5.403e-03, -1.741e-04, -8.498e-03, 1.394e-03, -3.994e-02, -3.250e-03, -2.562e-03, -3.110e-03, -3.404e-02, -6.000e-03, 2.425e-02, 4.038e-02) * s0_0_2;
	r0 += M4(-1.496e-03, 9.158e-02, 3.080e-03, 2.418e-03, 2.485e-01, -2.686e-01, 1.999e-01, 4.994e-03, 1.313e-01, -2.738e-01, 1.289e-02, 3.895e-04, -4.806e-02, 3.584e-02, 4.560e-01, -4.229e-03) * s0_1_0;
	r1 += M4(-1.452e-02, -8.323e-02, 2.195e-02, -3.226e-02, 5.450e-02, 5.686e-02, -3.503e-02, -7.149e-03, -2.136e-02, -5.072e-01, 2.145e-02, 1.325e-01, 2.551e-02, -2.152e-02, 1.964e-02, -1.500e-04) * s0_1_0;
	r2 += M4(-1.717e-02, 2.740e-01, 1.801e-01, 3.855e-01, 4.592e-02, -3.799e-01, 5.550e-02, -1.816e-02, 3.115e-02, 1.532e-01, -5.641e-02, -1.208e-01, -5.576e-02, -1.132e-02, 4.443e-02, 9.736e-02) * s0_1_0;
	r0 += M4(-2.314e-01, 2.698e-01, 3.960e-02, 5.977e-02, 5.198e-01, 1.317e-01, -9.738e-02, 1.460e-02, 2.006e-01, 3.290e-01, 9.351e-03, 7.423e-02, -3.388e-01, -1.530e-02, 1.388e-01, 4.273e-01) * s0_1_1;
	r1 += M4(4.287e-01, 4.098e-01, -2.776e-01, 4.453e-02, -2.900e-02, 2.666e-02, 6.792e-01, -3.577e-03, 1.147e-01, 2.186e-01, 6.750e-02, 2.666e-01, -7.270e-02, 4.164e-02, 2.731e-02, 5.100e-02) * s0_1_1;
	r2 += M4(-5.140e-02, -2.706e-03, 1.439e-01, -6.555e-02, 2.586e-01, 8.319e-02, 3.469e-02, 2.191e-03, -1.575e-01, 2.067e-03, -7.170e-02, 3.673e-02, -2.361e-01, -1.042e-01, 9.144e-02, 8.645e-02) * s0_1_1;
	r0 += M4(1.280e-01, -8.530e-02, 9.726e-04, 1.753e-02, -1.857e-01, 1.763e-01, 6.633e-03, -5.655e-03, -1.541e-02, 5.152e-02, 3.305e-03, -1.064e-02, 8.974e-02, 2.696e-02, 3.942e-02, 5.080e-02) * s0_1_2;
	r1 += M4(-7.736e-02, -5.163e-02, 1.825e-01, 3.268e-02, -1.560e-02, 2.242e-02, -8.420e-02, -6.120e-02, 5.256e-02, 1.879e-02, -5.873e-02, -4.102e-02, 3.507e-02, -1.179e-02, -5.723e-01, 7.153e-02) * s0_1_2;
	r2 += M4(7.717e-02, 8.962e-04, 3.008e-02, 1.074e-02, -2.493e-01, -9.090e-03, 9.019e-03, 4.808e-03, -1.609e-02, 4.871e-03, -2.460e-02, -1.018e-02, 5.781e-02, -1.025e-02, 6.223e-04, 9.352e-03) * s0_1_2;
	r0 += M4(-1.707e-01, 2.948e-02, 1.176e-01, -5.562e-02, 5.431e-02, 1.157e-01, -7.895e-02, 3.466e-03, 2.794e-01, 3.286e-02, -2.554e-01, 7.300e-02, -5.064e-02, -5.559e-02, -2.602e-02, 5.282e-03) * s0_2_0;
	r1 += M4(-3.694e-02, -1.674e-02, -1.544e-03, 2.164e-02, -1.647e-02, -3.731e-02, -4.136e-02, -2.325e-02, 5.073e-02, -5.525e-02, -1.553e-02, -1.224e-02, 8.815e-03, 1.587e-02, 2.686e-02, 1.286e-02) * s0_2_0;
	r2 += M4(4.571e-02, 2.016e-01, 4.001e-02, 1.894e-02, -6.076e-02, 4.971e-02, -5.578e-03, -5.517e-03, -4.300e-02, -4.723e-01, -1.038e-02, 3.981e-02, 7.226e-03, 5.861e-02, -9.268e-03, -1.331e-02) * s0_2_0;
	r0 += M4(7.877e-02, -1.212e-02, 8.877e-02, 5.332e-01, 6.016e-02, 1.861e-02, 4.694e-02, -9.267e-02, -2.907e-01, 1.602e-01, -9.668e-02, -6.188e-01, 5.622e-02, -1.792e-01, -4.021e-02, 8.109e-02) * s0_2_1;
	r1 += M4(4.965e-02, 5.259e-02, -1.636e-01, -1.295e-02, -2.980e-02, -1.202e-02, -1.018e-01, 2.435e-03, -9.917e-03, 1.677e-02, 4.268e-01, 1.757e-02, 1.012e-02, -1.052e-02, 5.681e-02, -4.119e-03) * s0_2_1;
	r2 += M4(-2.127e-01, 2.948e-02, 4.087e-03, 1.007e-02, 5.362e-02, 9.139e-03, 7.674e-03, 3.130e-03, 9.472e-02, 2.480e-02, 1.760e-02, 3.446e-02, -6.148e-02, 2.318e-02, 5.397e-03, 4.965e-04) * s0_2_1;
	r0 += M4(4.383e-02, -1.042e-01, 4.354e-03, 2.865e-02, 9.771e-03, 2.895e-02, -7.572e-03, 4.272e-02, -8.825e-02, 1.580e-01, -1.125e-02, 2.725e-02, -4.179e-02, -3.845e-02, 4.993e-03, -2.693e-02) * s0_2_2;
	r1 += M4(2.589e-02, 4.808e-03, 2.211e-01, 1.894e-03, 7.731e-03, 1.912e-02, -2.940e-02, 5.413e-03, 4.080e-03, 4.979e-03, -1.513e-01, 3.043e-02, 2.509e-02, 3.833e-02, 1.255e-02, -3.757e-02) * s0_2_2;
	r2 += M4(-3.123e-02, 7.858e-03, 1.167e-02, 8.390e-03, 3.376e-02, -1.273e-02, 4.285e-03, 9.854e-03, -4.255e-02, -9.600e-03, -5.886e-03, -6.964e-03, 2.676e-02, 2.784e-04, 1.181e-02, 3.584e-03) * s0_2_2;
	r0 += M4(-1.470e-02, 1.143e-02, -1.093e-02, -3.809e-03, 4.370e-02, 2.851e-02, -1.559e-02, 4.163e-03, 3.984e-02, -3.349e-02, -1.335e-02, 1.361e-02, -1.466e-01, -1.344e-01, 1.665e-02, -1.172e-02) * s1_0_0;
	r1 += M4(-5.139e-02, -3.195e-02, 4.080e-03, 5.257e-02, 1.236e-02, -2.682e-03, 7.523e-04, -1.842e-02, -4.898e-03, -1.438e-02, -1.901e-02, -1.054e-02, 7.987e-03, -2.286e-02, 2.791e-02, -5.471e-02) * s1_0_0;
	r2 += M4(-3.054e-02, 1.716e-02, 1.294e-01, -1.146e-01, 1.274e-02, -1.569e-02, 5.513e-02, 6.951e-02, -2.326e-02, 2.926e-04, -1.618e-01, -6.561e-02, 6.143e-02, -7.956e-02, -1.339e-01, -2.156e-01) * s1_0_0;
	r0 += M4(1.485e-01, -3.329e-02, 2.382e-02, 2.043e-02, -8.292e-02, -1.192e-02, 5.234e-03, -5.227e-03, 2.419e-02, 1.215e-02, -2.718e-02, -3.578e-03, -5.108e-01, -4.526e-01, 5.555e-02, -1.989e-02) * s1_0_1;
	r1 += M4(5.496e-04, 4.463e-02, 1.072e-02, -2.326e-01, 7.149e-03, -9.829e-03, 3.357e-02, 1.180e-01, 4.138e-02, -1.726e-02, 8.637e-03, -2.363e-01, -9.747e-02, 2.844e-02, -3.536e-02, 1.556e-01) * s1_0_1;
	r2 += M4(-6.642e-03, 3.799e-02, -2.552e-01, -3.680e-01, -1.307e-02, -1.380e-02, 2.955e-04, 5.894e-02, 3.274e-02, -1.790e-03, -3.138e-02, -4.022e-02, -1.916e-01, -2.786e-02, -1.221e-01, -1.249e-01) * s1_0_1;
	r0 += M4(4.390e-02, 5.415e-02, -1.371e-02, 2.402e-03, 5.527e-02, -5.529e-02, -1.324e-02, 1.262e-03, 6.136e-02, 3.659e-02, -5.926e-03, 4.760e-03, 8.520e-02, -1.747e-01, 1.088e-02, -5.296e-03) * s1_0_2;
	r1 += M4(-6.738e-02, -2.860e-02, -2.071e-02, 1.744e-01, -2.770e-03, 7.737e-03, 1.020e-02, -6.258e-01, 1.312e-02, 9.161e-03, 1.331e-02, -1.586e-01, -8.384e-02, -3.984e-02, -1.626e-01, -1.513e-01) * s1_0_2;
	r2 += M4(6.106e-02, 3.827e-03, 8.007e-02, 4.646e-02, 5.026e-02, 1.387e-02, 1.580e-02, 5.346e-03, -2.023e-02, 7.966e-03, -4.096e-03, -3.153e-03, -3.146e-02, 9.136e-04, -3.071e-03, -1.693e-02) * s1_0_2;
	r0 += M4(1.670e-01, 4.396e-02, -1.447e-01, 1.536e-02, -1.622e-01, -2.064e-02, -3.702e-02, 5.328e-04, -2.778e-01, 1.075e-01, -3.652e-02, -1.617e-03, 7.991e-02, -1.126e-01, -6.643e-01, -3.338e-02) * s1_1_0;
	r1 += M4(3.228e-03, 7.871e-02, 7.934e-02, 2.844e-02, -6.661e-02, -6.846e-03, -1.062e-02, -6.442e-02, 6.394e-03, -1.014e-02, 1.848e-02, 3.367e-02, 4.734e-02, -1.711e-02, -6.901e-02, 1.877e-02) * s1_1_0;
	r2 += M4(-2.269e-03, 7.416e-02, 2.381e-03, 1.619e-01, 4.966e-02, -4.761e-02, 6.384e-02, -2.368e-01, -2.074e-01, -2.408e-01, -1.778e-01, -1.297e-01, 7.126e-02, -4.853e-01, 1.663e-02, -6.400e-02) * s1_1_0;
	r0 += M4(-2.451e-01, 3.643e-01, 7.074e-02, -2.880e-01, -7.200e-01, 8.561e-02, -6.708e-03, 3.598e-02, 7.421e-02, 1.574e-01, -5.770e-02, -1.341e-02, -5.317e-01, -2.047e-01, -5.233e-02, -4.011e-01) * s1_1_1;
	r1 += M4(-1.855e-01, -6.872e-03, -5.462e-02, 3.818e-02, 3.617e-02, -5.531e-02, -4.755e-02, -7.593e-02, -1.121e-01, -8.621e-02, -1.657e-01, -1.685e-01, -4.519e-02, 3.126e-02, 3.079e-01, 5.231e-02) * s1_1_1;
	r2 += M4(3.627e-01, -1.215e-03, 1.209e-01, 4.347e-01, -3.135e-01, 7.049e-02, 3.892e-01, 4.068e-02, 7.053e-02, 2.593e-02, -5.606e-02, -5.057e-02, -2.797e-01, -8.633e-02, 1.677e-03, -4.076e-02) * s1_1_1;
	r0 += M4(6.961e-02, 8.561e-02, 1.472e-02, 2.954e-02, -6.154e-03, -6.819e-02, 1.168e-03, -2.130e-02, -5.003e-02, 4.508e-02, 2.604e-02, -1.299e-02, 4.505e-02, -4.904e-01, -2.124e-02, -9.075e-02) * s1_1_2;
	r1 += M4(1.524e-01, 3.709e-02, 1.987e-01, -2.767e-03, -6.391e-02, -5.861e-02, -1.885e-01, 7.109e-02, -5.481e-02, -4.322e-02, -1.006e-02, -7.161e-02, -3.464e-02, -4.171e-02, -2.226e-01, 1.702e-01) * s1_1_2;
	r2 += M4(2.621e-01, 9.528e-03, 1.809e-02, 6.051e-02, 1.230e-01, -9.557e-03, 4.554e-02, 1.569e-02, -4.534e-02, -1.739e-02, -4.045e-03, -1.076e-02, 2.368e-01, 1.155e-02, -1.265e-02, 2.735e-03) * s1_1_2;
	r0 += M4(-1.128e-01, 3.086e-02, 1.401e-01, -2.353e-02, 5.208e-02, -7.452e-02, -1.463e-01, 2.283e-02, -1.219e-01, -3.045e-02, 7.683e-02, -2.489e-02, 5.896e-02, -2.397e-01, -2.920e-01, 6.006e-03) * s1_2_0;
	r1 += M4(1.770e-02, 3.065e-02, -1.327e-02, -5.421e-03, -9.215e-04, -7.245e-03, 1.195e-02, 8.154e-03, -2.920e-02, 9.180e-03, 5.086e-02, -2.143e-02, -9.784e-03, -1.047e-02, 3.070e-03, 2.394e-02) * s1_2_0;
	r2 += M4(-3.685e-02, 1.982e-02, -1.445e-02, 3.515e-03, 2.906e-02, 2.015e-01, 8.945e-04, 2.190e-02, -1.557e-02, -2.505e-01, -3.340e-03, 4.019e-03, 3.496e-02, 2.501e-02, -2.382e-03, -1.400e-03) * s1_2_0;
	r0 += M4(9.288e-03, 2.522e-01, -4.894e-02, 1.752e-01, -3.232e-01, -2.598e-01, 1.051e-01, -3.698e-01, -1.734e-02, 9.148e-02, 2.616e-02, -7.153e-01, -9.748e-02, -4.719e-01, 1.369e-02, -1.094e-01) * s1_2_1;
	r1 += M4(-1.306e-02, -9.708e-02, -1.365e-02, 1.807e-02, 7.475e-03, -2.782e-02, 1.315e-01, 2.575e-02, 5.209e-03, -2.032e-02, -2.896e-01, -5.884e-02, 1.580e-02, -1.879e-02, 8.110e-02, 4.685e-03) * s1_2_1;
	r2 += M4(-6.462e-01, 2.366e-02, 4.130e-03, -1.985e-02, 6.190e-01, 6.791e-02, -2.141e-02, -4.503e-02, -4.572e-02, 5.064e-03, 5.884e-03, 2.424e-02, 1.123e-01, 5.008e-03, 3.731e-03, -6.320e-03) * s1_2_1;
	r0 += M4(-9.541e-02, 1.731e-01, 1.087e-02, 3.291e-02, -2.005e-01, 2.618e-02, 2.059e-02, 2.393e-01, 1.579e-02, 5.696e-02, 6.706e-03, 2.252e-02, -8.970e-03, -2.939e-01, 2.596e-03, 5.310e-03) * s1_2_2;
	r1 += M4(1.319e-02, 2.663e-02, -2.311e-01, -3.134e-02, 4.422e-03, 2.296e-02, -9.158e-02, -2.548e-02, -1.091e-02, -1.152e-01, -9.005e-02, 1.111e-02, 1.092e-02, 1.354e-02, 1.206e-02, -2.841e-02) * s1_2_2;
	r2 += M4(-1.764e-01, 1.302e-02, -1.941e-02, -9.249e-03, -3.346e-01, 2.382e-02, 7.044e-03, 2.073e-03, -5.139e-02, -6.465e-03, -8.369e-03, -3.749e-03, -3.869e-02, -1.219e-03, 1.092e-02, 9.843e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(1.016e-01, 3.140e-02, -1.649e-02, -1.198e-02, -2.196e-02, 1.002e-02, 3.516e-03, 2.712e-03, 4.611e-03, -2.670e-01, 4.313e-03, 3.427e-03, -3.714e-02, 9.393e-03, 3.532e-02, 4.129e-04) * s0_0_0;
	r1 += M4(-1.953e-02, 6.145e-03, 3.763e-02, -1.161e-02, -1.024e-02, -8.172e-04, 5.332e-03, 1.328e-02, 1.691e-02, 2.216e-04, -1.306e-02, -3.937e-02, -1.555e-02, -7.660e-03, -2.818e-03, -1.467e-02) * s0_0_0;
	r2 += M4(6.639e-02, -3.919e-02, 1.397e-01, 2.767e-01, 2.496e-02, 2.764e-02, 2.673e-02, 2.386e-02, -4.938e-02, -1.831e-02, 4.010e-03, 3.338e-02, 3.139e-02, 4.979e-02, -8.775e-02, -1.108e-01) * s0_0_0;
	r0 += M4(-1.675e-01, 3.343e-02, 8.067e-03, 1.596e-03, 1.324e-01, -3.792e-02, 1.482e-02, -1.536e-02, -1.208e-01, -5.391e-01, -1.226e-03, -6.539e-03, 1.650e-01, 9.004e-02, 4.296e-03, 3.458e-03) * s0_0_1;
	r1 += M4(1.392e-01, -1.617e-02, 1.971e-02, 2.371e-01, 6.474e-02, -1.491e-02, -1.520e-02, -4.114e-02, -7.956e-03, 1.821e-02, 1.737e-03, -8.440e-02, 7.199e-02, -8.024e-03, -2.531e-02, 2.203e-02) * s0_0_1;
	r2 += M4(-7.188e-04, -7.575e-03, 5.528e-01, 5.373e-01, 2.795e-02, 4.899e-03, -3.214e-02, 1.367e-01, 8.300e-02, 8.193e-03, 2.019e-02, -8.178e-03, 2.256e-02, 3.455e-02, -1.499e-02, -1.534e-02) * s0_0_1;
	r0 += M4(-1.996e-02, 3.681e-02, 2.570e-02, 8.208e-03, 6.067e-02, -1.044e-01, -2.413e-02, 1.190e-02, 7.029e-02, -1.614e-01, 1.037e-03, 1.341e-02, -5.253e-02, 7.538e-02, 9.204e-04, 3.866e-03) * s0_0_2;
	r1 += M4(-3.324e-03, 1.716e-02, -8.193e-03, 4.239e-01, 2.290e-02, -1.077e-03, 1.717e-02, -2.584e-02, -1.643e-02, -1.581e-02, -2.158e-02, -4.049e-01, 1.624e-02, -5.501e-02, -5.168e-03, 3.887e-01) * s0_0_2;
	r2 += M4(4.559e-02, -7.539e-03, -3.386e-02, -2.729e-02, 1.681e-02, 2.622e-02, 2.773e-02, 2.335e-02, -4.985e-02, 1.771e-03, -6.392e-03, -7.813e-03, 3.071e-02, -9.119e-03, 3.889e-02, 1.729e-02) * s0_0_2;
	r0 += M4(-1.303e-01, 7.322e-02, 1.486e-01, -2.015e-02, -3.334e-02, -6.595e-02, 4.256e-03, -4.319e-04, -6.904e-02, -5.448e-01, -3.640e-02, 1.534e-02, -7.820e-02, 4.941e-02, 3.545e-02, -4.957e-03) * s0_1_0;
	r1 += M4(3.490e-02, 6.697e-03, -5.771e-02, 1.209e-02, -1.614e-02, -3.796e-02, 1.150e-02, 6.400e-04, -8.336e-03, 1.151e-02, 1.097e-02, 1.155e-02, 2.377e-03, -2.419e-02, 1.113e-02, -4.185e-02) * s0_1_0;
	r2 += M4(-2.782e-02, 2.789e-01, -3.402e-02, -4.058e-02, -6.228e-02, -5.278e-03, 1.551e-03, 3.703e-02, 9.093e-02, 8.610e-02, 1.501e-03, -2.190e-02, -1.130e-03, -2.640e-01, 6.116e-03, -1.280e-01) * s0_1_0;
	r0 += M4(2.604e-01, 2.005e-01, -3.397e-02, 2.805e-01, -2.227e-01, 1.429e-02, 5.247e-02, 6.185e-02, 3.373e-02, -8.900e-01, -5.460e-02, -5.173e-03, 7.049e-01, 2.871e-01, 7.885e-02, -5.190e-02) * s0_1_1;
	r1 += M4(1.068e-01, 1.891e-02, -7.490e-02, -3.058e-02, -6.454e-02, 4.509e-02, -1.557e-01, 3.525e-02, -6.641e-02, -9.201e-02, -1.386e-01, -9.867e-03, -2.018e-01, 1.384e-01, 6.531e-02, -1.023e-01) * s0_1_1;
	r2 += M4(4.927e-02, 5.651e-02, -7.970e-02, -8.271e-02, -6.685e-02, 3.871e-01, 2.153e-02, -1.680e-01, -3.649e-01, -6.325e-02, -1.616e-02, 2.379e-02, 2.803e-01, 8.858e-02, 9.627e-02, 6.113e-01) * s0_1_1;
	r0 += M4(-4.329e-02, -5.510e-02, -5.088e-02, -7.058e-02, 9.511e-03, -5.436e-01, 8.891e-03, 7.212e-02, -2.420e-01, -3.438e-01, -2.412e-03, -2.910e-02, -1.419e-01, 1.076e-01, -2.354e-02, 7.885e-02) * s0_1_2;
	r1 += M4(-9.927e-03, 1.326e-02, 4.002e-01, -4.837e-02, -7.056e-02, -1.425e-03, -2.391e-01, -1.967e-01, 3.215e-02, 3.060e-02, -2.270e-01, -4.467e-02, 1.994e-01, 1.753e-02, 2.529e-01, 3.715e-03) * s0_1_2;
	r2 += M4(4.637e-02, 1.411e-03, -6.074e-03, 1.729e-02, 1.012e-01, -8.923e-03, 1.985e-01, 1.786e-01, -6.503e-02, -4.846e-03, -4.341e-03, -6.977e-03, -7.721e-02, 1.506e-02, -1.020e-02, 2.270e-02) * s0_1_2;
	r0 += M4(2.178e-02, -3.058e-02, 4.409e-02, 2.763e-02, 3.069e-02, -1.104e-01, -1.525e-03, -1.588e-02, -1.875e-01, -1.277e-01, 4.002e-02, -2.435e-02, -2.711e-01, 7.532e-02, 1.213e-01, 1.981e-03) * s0_2_0;
	r1 += M4(1.942e-02, 1.218e-02, 1.546e-02, 1.633e-04, 6.155e-03, -4.943e-03, 1.019e-02, 1.428e-02, -1.564e-02, -2.092e-04, -3.352e-02, 6.320e-03, -3.450e-02, -2.069e-03, -1.647e-02, 2.029e-02) * s0_2_0;
	r2 += M4(-1.388e-04, -3.612e-02, -5.749e-03, 9.472e-04, 7.981e-02, 3.135e-02, 8.866e-03, 6.549e-04, -7.960e-02, -3.937e-02, 2.585e-03, 6.597e-03, -3.665e-02, 1.313e-01, -9.885e-03, 3.584e-02) * s0_2_0;
	r0 += M4(-1.608e-01, -6.827e-02, 4.984e-02, -2.335e-01, -1.807e-02, 1.431e-02, 7.023e-01, -1.364e-02, -3.341e-02, -5.102e-01, -3.316e-02, 4.684e-02, 1.958e-01, 3.830e-02, -4.008e-02, 8.448e-02) * s0_2_1;
	r1 += M4(-1.516e-02, -1.378e-03, 4.735e-02, 1.380e-02, -3.745e-02, -3.163e-02, 8.181e-02, -2.203e-03, 2.516e-02, 2.748e-02, -1.845e-01, -6.395e-02, 1.115e-02, -8.676e-02, -8.958e-02, 1.069e-02) * s0_2_1;
	r2 += M4(3.821e-02, -2.563e-02, -1.550e-02, -1.992e-03, 7.265e-03, 3.135e-01, 2.696e-03, 3.815e-02, -2.888e-01, 1.219e-02, -6.569e-03, -5.837e-03, 4.474e-02, -7.300e-02, -1.666e-03, -9.421e-04) * s0_2_1;
	r0 += M4(9.614e-02, -7.065e-02, -1.068e-03, 5.048e-02, 2.760e-02, 1.140e-02, 1.405e-02, 7.148e-01, -1.548e-01, 6.692e-02, -2.072e-03, -2.730e-02, -5.501e-02, 3.049e-03, -7.432e-03, -6.665e-02) * s0_2_2;
	r1 += M4(-3.357e-02, -2.710e-02, -1.585e-02, 2.796e-02, -3.431e-02, -9.260e-02, -2.066e-01, 2.354e-02, -1.212e-02, -1.969e-02, -2.104e-01, -1.903e-02, 8.048e-03, 8.106e-03, -6.624e-03, 2.266e-02) * s0_2_2;
	r2 += M4(2.091e-02, 3.723e-03, -8.364e-03, -1.432e-03, -1.984e-02, -1.728e-02, -1.426e-02, -9.034e-03, -1.929e-02, 3.454e-03, -6.361e-03, -7.978e-03, -1.867e-01, 6.408e-04, 1.926e-03, 2.983e-04) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_SOFT] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv4
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(8.381e-03, -5.337e-04, -9.637e-03, 2.370e-04, -3.788e-02, -7.526e-03, -2.010e-02, 2.584e-03, -5.980e-06, -6.376e-06, 2.881e-04, -4.844e-06, 5.442e-02, -2.618e-03, 5.506e-02, -3.089e-03) * s0_0_0;
	r0 += M4(-1.519e-01, 3.500e-03, 1.139e-02, -4.555e-03, -7.608e-02, -9.172e-02, -8.467e-02, -7.249e-02, 2.588e-01, -1.100e-02, 1.444e-01, -8.324e-05, 4.199e-02, -9.560e-01, 7.323e-03, 1.141e-01) * s0_0_1;
	r0 += M4(-2.762e-03, -2.367e-02, 2.233e-03, -2.144e-03, 4.594e-04, -2.394e-02, 5.215e-03, -1.114e-02, -6.538e-03, 5.193e-01, -5.215e-02, 8.818e-02, 8.528e-04, 2.690e-02, -3.972e-06, 5.147e-03) * s0_0_2;
	r0 += M4(4.089e-02, 2.765e-03, 5.188e-02, 3.472e-03, -1.266e-02, -2.785e-02, -2.285e-02, -3.300e-02, -7.237e-07, -5.050e-06, -2.955e-04, 2.069e-05, -3.212e-03, 3.406e-03, 3.792e-02, -2.581e-03) * s0_1_0;
	r0 += M4(-1.351e-01, 2.158e-01, -5.752e-01, 9.589e-02, 2.222e-01, 1.703e-01, 1.753e-01, 1.255e-01, 8.627e-04, -1.064e-03, 7.367e-02, -1.149e-03, -3.564e-03, -3.291e-03, 1.948e-03, -1.282e-02) * s0_1_1;
	r0 += M4(2.775e-03, -5.529e-02, -1.788e-04, -2.666e-02, -2.559e-02, 2.714e-03, -4.446e-02, -8.291e-03, 3.263e-03, -6.336e-03, 1.997e-01, 2.229e-01, 3.700e-04, -2.392e-03, -8.699e-04, 2.948e-02) * s0_1_2;
	r0 += M4(-2.865e-04, -2.977e-05, 1.682e-02, 8.777e-04, -6.114e-04, -5.408e-04, -1.016e-02, -7.532e-03, -1.440e-07, 9.155e-07, 2.887e-06, 5.509e-08, -2.590e-05, 8.857e-06, 2.010e-04, 1.499e-04) * s0_2_0;
	r0 += M4(-3.860e-04, -4.749e-03, 3.747e-02, 4.636e-02, -1.657e-03, -1.134e-02, 1.239e-02, 9.712e-03, 7.930e-07, -5.469e-06, -9.479e-06, -3.160e-07, 2.889e-05, -1.613e-05, -2.628e-05, 4.986e-04) * s0_2_1;
	r0 += M4(1.210e-03, -1.006e-03, -3.222e-04, -3.006e-02, 7.282e-04, -1.978e-03, -4.794e-04, -6.402e-03, -4.035e-06, 1.094e-05, -1.851e-04, -3.224e-04, 1.356e-06, -1.393e-08, 3.265e-05, -3.021e-04) * s0_2_2;
	r0 += M4(1.373e-03, -8.676e-04, 1.236e-04, 2.623e-04, 6.814e-02, 1.823e-02, 4.164e-04, 3.275e-04, 1.607e-01, 1.338e-02, -7.112e-03, -2.358e-03, -4.494e-03, -3.196e-04, -4.995e-04, -2.058e-04) * s1_0_0;
	r0 += M4(-1.103e-02, -2.700e-04, -5.051e-04, -2.087e-04, -3.135e-03, 5.636e-02, 1.259e-03, 1.324e-03, 2.192e-03, 5.323e-02, -2.679e-04, -3.847e-03, -6.637e-05, 1.071e-04, 5.012e-05, -1.936e-04) * s1_0_1;
	r0 += M4(-1.252e-03, -1.260e-03, -1.463e-04, -4.087e-04, 9.170e-04, -6.782e-04, 3.729e-04, 1.390e-03, -2.714e-04, 8.577e-04, -1.332e-04, -6.069e-04, -2.919e-05, -1.657e-05, -6.048e-06, 2.733e-06) * s1_0_2;
	r0 += M4(-1.386e-01, -2.206e-03, -1.812e-01, 1.075e-02, -9.003e-02, -1.747e-02, -2.738e-02, -4.402e-03, -8.890e-02, -2.316e-01, 6.133e-01, -1.119e-01, -5.315e-02, -4.190e-02, 6.775e-03, -2.593e-03) * s1_1_0;
	r0 += M4(2.000e-01, 4.070e-01, 2.753e-02, -1.822e-01, -1.475e-01, 2.111e-01, -4.948e-02, 5.193e-01, 1.139e-02, 4.071e-02, 5.158e-03, 1.075e-01, -3.948e-03, -1.809e-02, 1.605e-04, 4.589e-03) * s1_1_1;
	r0 += M4(7.022e-03, 4.761e-02, 6.135e-04, 3.339e-02, -5.708e-03, -3.821e-02, -9.693e-04, -3.210e-02, 5.091e-04, 7.779e-05, 1.067e-05, -8.435e-04, 3.538e-04, -9.539e-04, 1.884e-04, 1.050e-03) * s1_1_2;
	r0 += M4(-3.169e-03, 4.554e-03, 7.057e-02, -5.446e-04, -3.407e-03, -5.521e-04, 2.238e-02, 2.842e-03, 4.899e-03, -6.446e-04, -2.873e-02, -6.271e-02, 5.456e-01, -1.123e-02, -2.007e-01, -1.630e-01) * s1_2_0;
	r0 += M4(7.789e-04, 9.922e-04, 1.265e-01, 2.307e-01, 2.212e-04, -2.183e-03, -7.679e-02, -1.063e-03, -4.404e-04, 6.865e-03, 7.764e-03, 1.062e-02, 5.370e-04, 9.055e-02, -3.345e-03, 8.346e-03) * s1_2_1;
	r0 += M4(-8.093e-05, 1.862e-03, 1.757e-03, 2.693e-02, 5.219e-05, -1.210e-03, -1.251e-03, -1.923e-02, -1.688e-05, 1.096e-03, 2.278e-04, 2.987e-03, 4.484e-04, 4.695e-03, 1.452e-05, 3.523e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(6.456e-02, 4.681e-04, 3.202e-04, -1.882e-05, -6.875e-06, -5.255e-05, 1.545e-04, 5.690e-06, 1.725e-03, 4.132e-05, 6.705e-06, -4.449e-06, -2.599e-03, -5.443e-05, -8.268e-05, 3.088e-06) * s0_0_0;
	r0 += M4(5.872e-02, 1.685e-01, 3.258e-03, 2.991e-03, 1.007e-01, -3.312e-03, -8.997e-03, 2.455e-03, -4.427e-03, -2.153e-04, -4.509e-05, 1.085e-04, 1.187e-02, 4.558e-04, 5.171e-04, -9.367e-06) * s0_0_1;
	r0 += M4(5.592e-03, 1.178e-02, -4.755e-04, 1.953e-04, 3.009e-02, 1.256e-01, -2.144e-03, 5.952e-04, 2.382e-03, -2.709e-05, 3.857e-05, -1.039e-04, -1.050e-03, 9.102e-04, 9.290e-05, 3.574e-04) * s0_0_2;
	r0 += M4(1.351e-02, 7.860e-03, 1.081e-01, -2.916e-03, -6.245e-05, 2.346e-05, -7.056e-04, 1.589e-05, 1.186e-02, -2.904e-04, 2.542e-03, -1.232e-03, -4.194e-03, 3.500e-04, -3.853e-03, 5.538e-04) * s0_1_0;
	r0 += M4(-2.272e-02, -2.793e-02, 1.567e-01, 4.697e-01, 2.549e-02, 1.413e-02, 1.131e-01, 4.555e-03, 4.007e-01, 1.808e-01, -8.867e-05, -5.964e-03, -6.432e-01, -9.839e-02, 2.160e-02, -7.575e-03) * s0_1_1;
	r0 += M4(-1.781e-03, 5.410e-03, 8.707e-03, 2.142e-02, -9.048e-03, 1.859e-02, 7.006e-02, -8.383e-01, 2.393e-02, 2.344e-01, 2.089e-03, 3.004e-03, -3.117e-02, -3.487e-01, -5.075e-03, -1.443e-02) * s0_1_2;
	r0 += M4(-2.369e-03, 2.750e-03, -6.362e-04, -5.306e-03, 6.061e-05, 1.398e-04, 4.214e-04, -3.922e-05, -6.393e-03, -1.922e-03, 4.114e-02, -4.092e-03, 3.135e-03, 1.042e-03, -2.162e-02, 2.755e-03) * s0_2_0;
	r0 += M4(5.106e-04, -9.120e-04, -1.826e-02, -2.388e-02, 2.637e-04, -1.808e-04, 8.345e-03, 6.624e-04, 2.386e-05, -1.386e-02, -6.949e-01, 9.170e-03, -4.652e-04, -3.560e-03, -5.589e-02, -2.736e-03) * s0_2_1;
	r0 += M4(-2.040e-04, -2.362e-03, -6.317e-04, -4.799e-03, -3.691e-05, -7.701e-05, -1.225e-02, -4.401e-03, -3.349e-03, -3.404e-03, 3.524e-02, -2.163e-02, 1.077e-03, 3.603e-03, -3.162e-02, -1.067e-01) * s0_2_2;
	r0 += V4(4.275e-11, 3.228e-10, 1.508e-08, 5.893e-11);
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
